diff --git a/.gitignore b/.gitignore index 56e1c443..2aba37ee 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ __pycache__/ # C extensions / Cython / Weave *.c -!*/_ext/src_fast_numerics.c +!*/_ext/src_numerics.c *.so # Distribution / packaging diff --git a/MANIFEST.in b/MANIFEST.in index 954cde83..746ade0a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -8,6 +8,5 @@ include *.ini pylintrc recursive-include pyunicorn *.pyx recursive-include pyunicorn/_ext recursive-include pyunicorn *.c -recursive-include pyunicorn *.h recursive-include tests *.py recursive-include examples *.py diff --git a/pyunicorn/timeseries/_ext/numerics.pyx b/pyunicorn/timeseries/_ext/numerics.pyx index b13f5ae8..c5e75b07 100644 --- a/pyunicorn/timeseries/_ext/numerics.pyx +++ b/pyunicorn/timeseries/_ext/numerics.pyx @@ -48,13 +48,80 @@ cdef extern from "time.h": cdef extern from "src_numerics.c": + void _manhattan_distance_matrix_fast(int ntime_x, int ntime_y, int dim, + double *x_embedded, double *y_embedded, float *distance) + void _euclidean_distance_matrix_fast(int ntime_x, int ntime_y, int dim, + double *x_embedded, double *y_embedded, float *distance) + void _supremum_distance_matrix_fast(int ntime_x, int ntime_y, int dim, + double *x_embedded, double *y_embedded, float *distance) void _test_pearson_correlation_fast(double *original_data, double *surrogates, float *correlation, int n_time, int N, double norm) void _test_pearson_correlation_slow(double *original_data, double *surrogates, float *correlation, int n_time, int N, double norm) + void _test_mutual_information_fast(int N, int n_time, int n_bins, + double scaling, double range_min, double *original_data, + double *surrogates, int *symbolic_original, int *symbolic_surrogates, + int *hist_original, int *hist_surrogates, int * hist2d, float *mi) + void _test_mutual_information_slow(int N, int n_time, int n_bins, + double scaling, double range_min, double *original_data, + double *surrogates, int *symbolic_original, int *symbolic_surrogates, + int *hist_original, int *hist_surrogates, int * hist2d, float *mi) -# surrogates ================================================================== +# cross_recurrence_plot ======================================================= + +def _manhattan_distance_matrix_crp( + int ntime_x, int ntime_y, int dim, + np.ndarray[double, ndim=2, mode='c'] x_embedded not None, + np.ndarray[double, ndim=2, mode='c'] y_embedded not None): + + cdef np.ndarray[float, ndim=2, mode='c'] distance = \ + np.zeros((ntime_x, ntime_y), dtype="float32") + + _manhattan_distance_matrix_fast( + ntime_x, ntime_y, dim, + np.PyArray_DATA(x_embedded), + np.PyArray_DATA(y_embedded), + np.PyArray_DATA(distance)) + + return distance + + +def _euclidean_distance_matrix_crp( + int ntime_x, int ntime_y, int dim, + np.ndarray[double, ndim=2, mode='c'] x_embedded not None, + np.ndarray[double, ndim=2, mode='c'] y_embedded not None): + + cdef np.ndarray[float, ndim=2, mode='c'] distance = \ + np.zeros((ntime_x, ntime_y), dtype="float32") + + _euclidean_distance_matrix_fast( + ntime_x, ntime_y, dim, + np.PyArray_DATA(x_embedded), + np.PyArray_DATA(y_embedded), + np.PyArray_DATA(distance)) + + return distance + + +def _supremum_distance_matrix_crp( + int ntime_x, int ntime_y, int dim, + np.ndarray[double, ndim=2, mode='c'] x_embedded not None, + np.ndarray[double, ndim=2, mode='c'] y_embedded not None): + + cdef np.ndarray[float, ndim=2, mode='c'] distance = \ + np.zeros((ntime_x, ntime_y), dtype="float32") + + _supremum_distance_matrix_fast( + ntime_x, ntime_y, dim, + np.PyArray_DATA(x_embedded), + np.PyArray_DATA(y_embedded), + np.PyArray_DATA(distance)) + + return distance + + +# surrogates ================================================================== def _embed_time_series_array( int N, int n_time, int dimension, int delay, @@ -104,7 +171,7 @@ def _recurrence_plot( break -def _twins( +def _twins_s( int N, int n_time, int dimension, float threshold, int min_dist, np.ndarray[FLOATTYPE_t, ndim=3] embedding_array, np.ndarray[FLOATTYPE_t, ndim=2] R, np.ndarray[FLOATTYPE_t, ndim=1] nR, @@ -176,6 +243,7 @@ def _twins( # Leave the while loop break + # recurrence plot============================================================== def _embed_time_series( @@ -201,8 +269,7 @@ def _embed_time_series( embedding[k, j] = time_series[index] index += 1 - -def _manhatten_distance_matrix( +def _manhattan_distance_matrix_rp( int n_time, int dim, np.ndarray[FLOAT32TYPE_t, ndim=2] embedding, np.ndarray[FLOAT32TYPE_t, ndim=2] distance): @@ -210,7 +277,7 @@ def _manhatten_distance_matrix( int j, k, l float sum - # Calculate the manhatten distance matrix + # Calculate the manhattan distance matrix for j in xrange(n_time): # Ignore the main diagonal, since every samle is neighbor of itself for k in xrange(j): @@ -221,7 +288,8 @@ def _manhatten_distance_matrix( distance[j, k] = distance[k, j] = sum -def _euclidean_distance_matrix( + +def _euclidean_distance_matrix_rp( int n_time, int dim, np.ndarray[FLOAT32TYPE_t, ndim=2] embedding, np.ndarray[FLOAT32TYPE_t, ndim=2] distance): @@ -240,7 +308,8 @@ def _euclidean_distance_matrix( sum += diff * diff distance[j, k] = distance[k, j] = sum -def _supremum_distance_matrix( + +def _supremum_distance_matrix_rp( int n_time, int dim, np.ndarray[FLOAT32TYPE_t, ndim=2] embedding, np.ndarray[FLOAT32TYPE_t, ndim=2] distance): @@ -525,6 +594,7 @@ def _vertline_dist_norqa_missingvalues( vertline[k] += 1 k = 0 + def _vertline_dist_norqa( int n_time, np.ndarray[INT32TYPE_t, ndim=1] vertline, np.ndarray[INT8TYPE_t, ndim=2] recmat): @@ -586,6 +656,7 @@ def _vertline_dist_rqa_missingvalues( vertline[k] += 1 k = 0 + def _vertline_dist_rqa( int n_time, np.ndarray[INT32TYPE_t, ndim=1] vertline, np.ndarray[FLOAT32TYPE_t, ndim=2] embedding, float eps, int dim): @@ -635,7 +706,8 @@ def _white_vertline_dist( white_vertline[k] += 1 k = 0 -def _twins( + +def _twins_r( int min_dist, int N, np.ndarray[INT8TYPE_t, ndim=2] R, np.ndarray[INTTYPE_t, ndim=1] nR, twins): @@ -671,33 +743,33 @@ def _twins( break -def _twin_surrogates( - int n_surrogates, int N, int dim, twins, - np.ndarray[FLOAT32TYPE_t, ndim=2] embedding, - np.ndarray[FLOATTYPE_t, ndim=3] surrogates): +def _twin_surrogates(int n_surrogates, int N, twins, + np.ndarray[FLOATTYPE_t, ndim=2] original_data): cdef int i, j, k, l, new_k, n_twins, rand + cdef np.ndarray[FLOATTYPE_t, ndim=2] surrogates = np.empty((n_surrogates,N)) # Initialize random number generator - # srand48(time(0)) -> does not work in cython somehow ?!?!? - + random.seed(datetime.now()) for i in xrange(n_surrogates): + # Get the twin list for time series i + twins_i = twins[i] + # Randomly choose a starting point in the original trajectory - k = int(floor(drand48() * N)) + k = int(floor(random.random() * N)) j = 0 while j < N: # Assign state vector of surrogate trajectory - for l in xrange(dim): - surrogates[i, j, l] = embedding[k, l] + surrogates[i,j] = original_data[i,k] # Get the list of twins of state vector k in the original time # series - twins_k = twins[k] + twins_ik = twins_i[k] # Get the number of twins of k - n_twins = len(twins_k) + n_twins = len(twins_ik) # If k has no twins, go to the next sample k+1, If k has twins at # m, choose among m+1 and k+1 with equal probability @@ -705,21 +777,21 @@ def _twin_surrogates( k += 1 else: # Generate a random integer between 0 and n_twins - rand = int(floor(drand48() * (n_twins + 1))) + rand = int(floor(random.random() * (n_twins + 1))) # If rand = n_twings go to smple k+1, otherwise jump to the # future of one of the twins if rand == n_twins: k += 1 else: - k = twins_k[rand] + k = twins_ik[rand] k += 1 # If the new k >= n_time, choose a new random starting point in the # original time series if k >= N: while True: - new_k = int(floor(drand48() * N)) + new_k = int(floor(random.random() * N)) if new_k != k: break @@ -727,6 +799,8 @@ def _twin_surrogates( j += 1 + return surrogates + def _test_pearson_correlation( np.ndarray[double, ndim=2, mode='c'] original_data not None, @@ -755,6 +829,64 @@ def _test_pearson_correlation( return correlation +def _test_mutual_information( + np.ndarray[double, ndim=2, mode='c'] original_data not None, + np.ndarray[double, ndim=2, mode='c'] surrogates not None, + int N, int n_time, int n_bins, fast): + + cdef: + # Get common range for all histograms + double range_min = np.min((original_data.min(), surrogates.min())) + double range_max = np.max((original_data.max(), surrogates.max())) + # Rescale all time series to the interval [0,1], using the maximum + # range of the whole dataset + double scaling = 1. / (range_max - range_min) + # Create arrays to hold symbolic trajectories + np.ndarray[int, ndim=2, mode='c'] symbolic_original = \ + np.empty((N, n_time), dtype="int32") + np.ndarray[int, ndim=2, mode='c'] symbolic_surrogates = \ + np.empty((N, n_time), dtype="int32") + # Initialize array to hold 1d-histograms of individual time series + np.ndarray[int, ndim=2, mode='c'] hist_original = \ + np.zeros((N, n_bins), dtype="int32") + np.ndarray[int, ndim=2, mode='c'] hist_surrogates = \ + np.zeros((N, n_bins), dtype="int32") + # Initialize array to hold 2d-histogram for one pair of time series + np.ndarray[int, ndim=2, mode='c'] hist2d = \ + np.zeros((n_bins, n_bins), dtype="int32") + # Initialize mutual information array + np.ndarray[float, ndim=2, mode='c'] mi = np.zeros((N, N), + dtype="float32") + + if (fast==True): + # original_data and surrogates must be contiguous Numpy arrays for + # this code to work correctly! + # All other arrays are generated from scratch in this method and + # are guaranteed to be contiguous by np. + _test_mutual_information_fast( + N, n_time, n_bins, scaling, range_min, + np.PyArray_DATA(original_data), + np.PyArray_DATA(surrogates), + np.PyArray_DATA(symbolic_original), + np.PyArray_DATA(symbolic_surrogates), + np.PyArray_DATA(hist_original), + np.PyArray_DATA(hist_surrogates), + np.PyArray_DATA(hist2d), + np.PyArray_DATA(mi)) + else: + _test_mutual_information_slow( + N, n_time, n_bins, scaling, range_min, + np.PyArray_DATA(original_data), + np.PyArray_DATA(surrogates), + np.PyArray_DATA(symbolic_original), + np.PyArray_DATA(symbolic_surrogates), + np.PyArray_DATA(hist_original), + np.PyArray_DATA(hist_surrogates), + np.PyArray_DATA(hist2d), + np.PyArray_DATA(mi)) + return mi + + # visibitly graph ============================================================= def _visibility_relations_missingvalues( @@ -837,6 +969,7 @@ def _visibility_relations_horizontal( for i in xrange(N-1): A[i, i+1] = A[i+1, i] = 1 + def _retarded_local_clustering( int N, np.ndarray[INT16TYPE_t, ndim=2] A, np.ndarray[FLOATTYPE_t, ndim=1] norm, @@ -861,6 +994,7 @@ def _retarded_local_clustering( retarded_clustering[i] = counter / norm[i] + def _advanced_local_clustering( int N, np.ndarray[INT16TYPE_t, ndim=2] A, np.ndarray[FLOATTYPE_t, ndim=1] norm, diff --git a/pyunicorn/timeseries/_ext/src_numerics.c b/pyunicorn/timeseries/_ext/src_numerics.c index f60ac176..124f49d6 100644 --- a/pyunicorn/timeseries/_ext/src_numerics.c +++ b/pyunicorn/timeseries/_ext/src_numerics.c @@ -8,6 +8,70 @@ * License: BSD (3-clause) */ + +// cross_recurrence_plot ====================================================== + +void _manhattan_distance_matrix_fast(int ntime_x, int ntime_y, int dim, + double *x_embedded, double *y_embedded, float *distance) { + + // Calculate the manhattan distance matrix + for (int j = 0; j < ntime_x; j++) { + for (int k = 0; k < ntime_y; k++) { + float sum = 0; + for (int l = 0; l < dim; l++) { + // Use manhattan norm + sum += fabs(x_embedded[j*ntime_x+l] - y_embedded[k*ntime_y+l]); + } + distance[j*ntime_x+k] = sum; + } + } +} + + +void _euclidean_distance_matrix_fast(int ntime_x, int ntime_y, int dim, + double *x_embedded, double *y_embedded, float *distance) { + + // Calculate the euclidean distance matrix + for (int j = 0; j < ntime_x; j++) { + for (int k = 0; k < ntime_y; k++) { + float sum = 0; + for (int l = 0; l < dim; l++) { + // Use euclidean norm + float diff = fabs(x_embedded[j*ntime_x+l] - + y_embedded[k*ntime_y+l]); + sum += diff * diff; + } + distance[j*ntime_x+k] = sqrt(sum); + } + } +} + + +void _supremum_distance_matrix_fast(int ntime_x, int ntime_y, int dim, + double *x_embedded, double *y_embedded, float *distance) { + + float temp_diff, diff; + + // Calculate the supremum distance matrix + + for (int j = 0; j < ntime_x; j++) { + for (int k = 0; k < ntime_y; k++) { + temp_diff = diff = 0; + for (int l = 0; l < dim; l++) { + // Use supremum norm + temp_diff = fabs(x_embedded[j*ntime_x+l] - + y_embedded[k*ntime_y+l]); + if (temp_diff > diff) + diff = temp_diff; + } + distance[j*ntime_x+k] = diff; + } + } +} + + +// surrogates ================================================================= + void _test_pearson_correlation_fast(double *original_data, double *surrogates, float *correlation, int n_time, int N, double norm) { @@ -55,3 +119,267 @@ void _test_pearson_correlation_slow(double *original_data, double *surrogates, } } } + + +void _test_mutual_information_fast(int N, int n_time, int n_bins, + double scaling, double range_min, double *original_data, + double *surrogates, int *symbolic_original, int *symbolic_surrogates, + int *hist_original, int *hist_surrogates, int * hist2d, float *mi) { + + long i, j, k, l, m, in_bins, jn_bins, in_time, jn_time; + double norm, rescaled, hpl, hpm, plm; + + double *p_original, *p_surrogates; + float *p_mi; + long *p_symbolic_original, *p_symbolic_surrogates, *p_hist_original, + *p_hist_surrogates, *p_hist2d; + + // Calculate histogram norm + norm = 1.0 / n_time; + + // Initialize in_bins, in_time + in_time = in_bins = 0; + + for (i = 0; i < N; i++) { + + // Set pointer to original_data(i,0) + p_original = original_data + in_time; + // Set pointer to surrogates(i,0) + p_surrogates = surrogates + in_time; + // Set pointer to symbolic_original(i,0) + p_symbolic_original = symbolic_original + in_time; + // Set pointer to symbolic_surrogates(i,0) + p_symbolic_surrogates = symbolic_surrogates + in_time; + + for (k = 0; k < n_time; k++) { + + // Rescale sample into interval [0,1] + rescaled = scaling * (*p_original - range_min); + + // Calculate symbolic trajectories for each time series, + // where the symbols are bin numbers. + if (rescaled < 1.0) + *p_symbolic_original = rescaled * n_bins; + else + *p_symbolic_original = n_bins - 1; + + // Calculate 1d-histograms for single time series + // Set pointer to hist_original(i, *p_symbolic_original) + p_hist_original = hist_original + in_bins + + *p_symbolic_original; + (*p_hist_original)++; + + // Rescale sample into interval [0,1] + rescaled = scaling * (*p_surrogates - range_min); + + // Calculate symbolic trajectories for each time series, + // where the symbols are bin numbers. + if (rescaled < 1.0) + *p_symbolic_surrogates = rescaled * n_bins; + else + *p_symbolic_surrogates = n_bins - 1; + + // Calculate 1d-histograms for single time series + // Set pointer to hist_surrogates(i, *p_symbolic_surrogates) + p_hist_surrogates = hist_surrogates + in_bins + + *p_symbolic_surrogates; + (*p_hist_surrogates)++; + + // Set pointer to original_data(i,k+1) + p_original++; + // Set pointer to surrogates(i,k+1) + p_surrogates++; + // Set pointer to symbolic_original(i,k+1) + p_symbolic_original++; + // Set pointer to symbolic_surrogates(i,k+1) + p_symbolic_surrogates++; + } + in_bins += n_bins; + in_time += n_time; + } + + // Initialize in_time, in_bins + in_time = in_bins = 0; + + for (i = 0; i < N; i++) { + + // Set pointer to mi(i,0) + p_mi = mi + i*N; + + // Initialize jn_time = 0; + jn_time = jn_bins = 0; + + for (j = 0; j < N; j++) { + + // Don't do anything if i = j, this case is not of + // interest here! + if (i != j) { + + // Set pointer to symbolic_original(i,0) + p_symbolic_original = symbolic_original + in_time; + // Set pointer to symbolic_surrogates(j,0) + p_symbolic_surrogates = symbolic_surrogates + jn_time; + + // Calculate 2d-histogram for one pair of time series + // (i,j). + for (k = 0; k < n_time; k++) { + + // Set pointer to hist2d(*p_symbolic_original, + // *p_symbolic_surrogates) + p_hist2d = hist2d + (*p_symbolic_original)*n_bins + + *p_symbolic_surrogates; + + (*p_hist2d)++; + + // Set pointer to symbolic_original(i,k+1) + p_symbolic_original++; + // Set pointer to symbolic_surrogates(j,k+1) + p_symbolic_surrogates++; + } + + // Calculate mutual information for one pair of time + // series (i,j) + + // Set pointer to hist_original(i,0) + p_hist_original = hist_original + in_bins; + + for (l = 0; l < n_bins; l++) { + + // Set pointer to hist_surrogates(j,0) + p_hist_surrogates = hist_surrogates + jn_bins; + // Set pointer to hist2d(l,0) + p_hist2d = hist2d + l*n_bins; + + hpl = (*p_hist_original) * norm; + + if (hpl > 0.0) { + for (m = 0; m < n_bins; m++) { + + hpm = (*p_hist_surrogates) * norm; + + if (hpm > 0.0) { + plm = (*p_hist2d) * norm; + if (plm > 0.0) + *p_mi += plm * log(plm/hpm/hpl); + } + + // Set pointer to hist_surrogates(j,m+1) + p_hist_surrogates++; + // Set pointer to hist2d(l,m+1) + p_hist2d++; + } + } + // Set pointer to hist_original(i,l+1) + p_hist_original++; + } + + // Reset hist2d to zero in all bins + for (l = 0; l < n_bins; l++) { + + // Set pointer to hist2d(l,0) + p_hist2d = hist2d + l*n_bins; + + for (m = 0; m < n_bins; m++) { + *p_hist2d = 0; + + // Set pointer to hist2d(l,m+1) + p_hist2d++; + } + } + } + // Set pointer to mi(i,j+1) + p_mi++; + + jn_time += n_time; + jn_bins += n_bins; + } + in_time += n_time; + in_bins += n_bins; + } +} + + +void _test_mutual_information_slow(int N, int n_time, int n_bins, + double scaling, double range_min, double *original_data, + double *surrogates, int *symbolic_original, int *symbolic_surrogates, + int *hist_original, int *hist_surrogates, int * hist2d, float *mi) { + + int i, j, k, l, m; + int symbol, symbol_i, symbol_j; + double rescaled, norm, hpl, hpm, plm; + + // Calculate histogram norm + norm = 1.0 / n_time; + + for (i = 0; i < N; i++) { + for (k = 0; k < n_time; k++) { + + // Original time series + // Calculate symbolic trajectories for each time series, + // where the symbols are bins + rescaled = scaling * (original_data[i*N+k] - range_min); + + if (rescaled< 1.0) + symbolic_original[i*N+k] = rescaled * n_bins; + else + symbolic_original[i*N+k] = n_bins - 1; + + // Calculate 1d-histograms for single time series + symbol = symbolic_original[i*N+k]; + hist_original[i*N+symbol] += 1; + + // Surrogate time series + // Calculate symbolic trajectories for each time series, + // where the symbols are bins + rescaled = scaling * (surrogates[i*N+k] - range_min); + + if (rescaled < 1.0) + symbolic_surrogates[i*N+k] = rescaled * n_bins; + else + symbolic_surrogates[i*N+k] = n_bins - 1; + + // Calculate 1d-histograms for single time series + symbol = symbolic_surrogates[i*N+k]; + hist_surrogates[i*N+symbol] += 1; + } + } + + for (i = 0; i < N; i++) { + for (j = 0; j < N; j++) { + + // The case i = j is not of interest here! + if (i != j) { + // Calculate 2d-histogram for one pair of time series + // (i,j). + for (k = 0; k < n_time; k++) { + symbol_i = symbolic_original[i*N+k]; + symbol_j = symbolic_surrogates[j*N+k]; + hist2d[symbol_i*n_bins+symbol_j] += 1; + } + + // Calculate mutual information for one pair of time + // series (i,j). + for (l = 0; l < n_bins; l++) { + hpl = hist_original[i*N+l] * norm; + if (hpl > 0.0) { + for (m = 0; m < n_bins; m++) { + hpm = hist_surrogates[j*N+m] * norm; + if (hpm > 0.0) { + plm = hist2d[l*n_bins+m] * norm; + if (plm > 0.0) { + mi[i*N+j] += plm * log(plm/hpm/hpl); + } + } + } + } + } + + // Reset hist2d to zero in all bins + for (l = 0; l < n_bins; l++) { + for (m = 0; m < n_bins; m++) + hist2d[l*n_bins+m] = 0; + } + } + } + } +} diff --git a/pyunicorn/timeseries/cross_recurrence_plot.py b/pyunicorn/timeseries/cross_recurrence_plot.py index d26e05da..47eb0298 100644 --- a/pyunicorn/timeseries/cross_recurrence_plot.py +++ b/pyunicorn/timeseries/cross_recurrence_plot.py @@ -15,10 +15,10 @@ # array object and fast numerics import numpy as np - from .recurrence_plot import RecurrencePlot -from .. import weave_inline # C++ inline code +from pyunicorn.timeseries._ext.numerics import _manhattan_distance_matrix_crp,\ + _euclidean_distance_matrix_crp, _supremum_distance_matrix_crp # # Class definitions @@ -111,9 +111,9 @@ def __init__(self, x, y, metric="supremum", normalize=False, """The length of the embedded time series y.""" # Store time series - self.x = x.copy().astype("float32") + self.x = x.copy() """The time series x.""" - self.y = y.copy().astype("float32") + self.y = y.copy() """The time series y.""" # Reshape time series @@ -218,30 +218,8 @@ def manhattan_distance_matrix(self, x_embedded, y_embedded): ntime_x = x_embedded.shape[0] ntime_y = y_embedded.shape[0] dim = x_embedded.shape[1] - - distance = np.zeros((ntime_x, ntime_y), dtype="float32") - - code = r""" - int j, k, l; - float sum; - - // Calculate the manhattan distance matrix - - for (j = 0; j < ntime_x; j++) { - for (k = 0; k < ntime_y; k++) { - sum = 0; - for (l = 0; l < dim; l++) { - // Use manhattan norm - sum += fabs(x_embedded(j,l) - y_embedded(k,l)); - } - distance(j,k) = sum; - } - } - """ - weave_inline(locals(), code, - ['ntime_x', 'ntime_y', 'dim', 'x_embedded', 'y_embedded', - 'distance']) - return distance + return _manhattan_distance_matrix_crp(ntime_x, ntime_y, dim, x_embedded, + y_embedded) def euclidean_distance_matrix(self, x_embedded, y_embedded): """ @@ -260,31 +238,8 @@ def euclidean_distance_matrix(self, x_embedded, y_embedded): ntime_x = x_embedded.shape[0] ntime_y = y_embedded.shape[0] dim = x_embedded.shape[1] - - distance = np.zeros((ntime_x, ntime_y), dtype="float32") - - code = r""" - int j, k, l; - float sum, diff; - - // Calculate the euclidean distance matrix - - for (j = 0; j < ntime_x; j++) { - for (k = 0; k < ntime_y; k++) { - sum = 0; - for (l = 0; l < dim; l++) { - // Use euclidean norm - diff = fabs(x_embedded(j,l) - y_embedded(k,l)); - sum += diff * diff; - } - distance(j,k) = sqrt(sum); - } - } - """ - weave_inline(locals(), code, - ['ntime_x', 'ntime_y', 'dim', 'x_embedded', 'y_embedded', - 'distance']) - return distance + return _euclidean_distance_matrix_crp(ntime_x, ntime_y, dim, x_embedded, + y_embedded) def supremum_distance_matrix(self, x_embedded, y_embedded): """ @@ -303,33 +258,8 @@ def supremum_distance_matrix(self, x_embedded, y_embedded): ntime_x = x_embedded.shape[0] ntime_y = y_embedded.shape[0] dim = x_embedded.shape[1] - - distance = np.zeros((ntime_x, ntime_y), dtype="float32") - - code = r""" - int j, k, l; - float temp_diff, diff; - - // Calculate the supremum distance matrix - - for (j = 0; j < ntime_x; j++) { - for (k = 0; k < ntime_y; k++) { - temp_diff = diff = 0; - for (l = 0; l < dim; l++) { - // Use supremum norm - temp_diff = fabs(x_embedded(j,l) - y_embedded(k,l)); - - if (temp_diff > diff) - diff = temp_diff; - } - distance(j,k) = diff; - } - } - """ - weave_inline(locals(), code, - ['ntime_x', 'ntime_y', 'dim', 'x_embedded', 'y_embedded', - 'distance']) - return distance + return _supremum_distance_matrix_crp(ntime_x, ntime_y, dim, x_embedded, + y_embedded) def set_fixed_threshold(self, threshold): """ diff --git a/pyunicorn/timeseries/recurrence_plot.py b/pyunicorn/timeseries/recurrence_plot.py index 8aaf9d61..cf2bce39 100644 --- a/pyunicorn/timeseries/recurrence_plot.py +++ b/pyunicorn/timeseries/recurrence_plot.py @@ -19,15 +19,15 @@ # C++ inline code from pyunicorn.timeseries._ext.numerics import \ - _embed_time_series, _manhatten_distance_matrix, \ - _euclidean_distance_matrix, _supremum_distance_matrix, \ + _embed_time_series, _manhattan_distance_matrix_rp, \ + _euclidean_distance_matrix_rp, _supremum_distance_matrix_rp, \ _set_adaptive_neighborhood_size, _bootstrap_distance_matrix_manhatten, \ _bootstrap_distance_matrix_euclidean, _bootstrap_distance_matrix_supremum,\ _diagline_dist_norqa_missingvalues, _diagline_dist_norqa, \ _diagline_dist_rqa_missingvalues, _diagline_dist_rqa, \ _vertline_dist_norqa_missingvalues, _vertline_dist_norqa, \ _vertline_dist_rqa_missingvalues, _vertline_dist_rqa, _rejection_sampling,\ - _white_vertline_dist, _twins, _twin_surrogates + _white_vertline_dist, _twins_r, _twin_surrogates # # Class definitions @@ -444,7 +444,7 @@ def manhattan_distance_matrix(self, embedding): (n_time, dim) = embedding.shape distance = np.zeros((n_time, n_time), dtype="float32") - _manhatten_distance_matrix(n_time, dim, embedding, distance) + _manhattan_distance_matrix_rp(n_time, dim, embedding, distance) return distance def euclidean_distance_matrix(self, embedding): @@ -463,7 +463,7 @@ def euclidean_distance_matrix(self, embedding): (n_time, dim) = embedding.shape distance = np.zeros((n_time, n_time), dtype="float32") - _euclidean_distance_matrix(n_time, dim, embedding, distance) + _euclidean_distance_matrix_rp(n_time, dim, embedding, distance) distance = np.sqrt(distance) return distance @@ -483,7 +483,7 @@ def supremum_distance_matrix(self, embedding): (n_time, dim) = embedding.shape distance = np.zeros((n_time, n_time), dtype="float32") - _supremum_distance_matrix(n_time, dim, embedding, distance) + _supremum_distance_matrix_rp(n_time, dim, embedding, distance) return distance def set_fixed_threshold(self, threshold): @@ -1386,7 +1386,7 @@ def twins(self, min_dist=7): # Get number of neighbors for each state vector nR = R.sum(axis=0) - _twins(min_dist, N, R, nR, twins) + _twins_r(min_dist, N, R, nR, twins) return twins def twin_surrogates(self, n_surrogates=1, min_dist=7): diff --git a/pyunicorn/timeseries/surrogates.py b/pyunicorn/timeseries/surrogates.py index 974c8370..7d5b5bee 100644 --- a/pyunicorn/timeseries/surrogates.py +++ b/pyunicorn/timeseries/surrogates.py @@ -16,11 +16,9 @@ from numpy import random from pyunicorn.timeseries._ext.numerics import \ - _embed_time_series_array, _recurrence_plot, _twins, \ - _test_pearson_correlation + _embed_time_series_array, _recurrence_plot, _twins_s, \ + _twin_surrogates, _test_pearson_correlation, _test_mutual_information -# C++ inline code -from .. import weave_inline # easy progress bar handling from ..utils import progressbar @@ -245,7 +243,7 @@ def twins(self, embedding_array, threshold, min_dist=7): # Initialize array to store the number of neighbors for each sample nR = np.empty(n_time) - _twins(N, n_time, dimension, threshold, min_dist, embedding_array, R, + _twins_s(N, n_time, dimension, threshold, min_dist, embedding_array, R, nR, twins) return twins @@ -514,71 +512,8 @@ def twin_surrogates(self, original_data, dimension, delay, threshold, self._twins = twins self._twins_cached = True - surrogates = np.empty(original_data.shape) - code = r""" - int i, j, k, new_k, n_twins, rand; - - // Initialize random number generator - srand48(time(0)); - - for (i = 0; i < N; i++) { - // Get the twin list for time series i - py::list twins_i = PyList_GetItem(twins, i); - - // Randomly choose a starting point in the original_data - // trajectory. - k = floor(drand48() * n_time); - - j = 0; - - while (j < n_time) { - surrogates(i,j) = original_data(i,k); - - // Get the list of twins of sample k in the original_data - // time series. - py::list twins_ik = PyList_GetItem(twins_i,k); - - // Get the number of twins of k - n_twins = PyList_Size(twins_ik); - - // If k has no twins, go to the next sample k+1. If k has - // twins at m, choose among m+1 and k+1 with equal probability - if (n_twins == 0) - k++; - else { - // Generate a random integer between 0 and n_twins - rand = floor(drand48() * (n_twins + 1)); - - // If rand = n_twins go to sample k+1, otherwise jump - // to the future of one of the twins. - if (rand == n_twins) - k++; - else { - k = twins_ik[rand]; - k++; - } - - } - - // If the new k >= n_time, choose a new random starting point - // in the original_data time series. - if (k >= n_time) { - do { - new_k = floor(drand48() * n_time); - } - while (k == new_k); - - k = new_k; - } - - j++; - } - } - """ - weave_inline(locals(), code, - ['N', 'n_time', 'original_data', 'twins', 'surrogates']) - return surrogates + return _twin_surrogates(N, n_time, twins, original_data) # # Defines methods to generate correlation measure matrices based on @@ -664,299 +599,13 @@ def test_mutual_information(original_data, surrogates, n_bins=32, :return: the mutual information test matrix. """ (N, n_time) = original_data.shape - - # Get common range for all histograms - range_min = float(np.min(original_data.min(), surrogates.min())) - range_max = float(np.max(original_data.max(), surrogates.max())) - - # Rescale all time series to the interval [0,1], using the maximum - # range of the whole dataset - scaling = 1. / (range_max - range_min) - - # Create arrays to hold symbolic trajectories - symbolic_original = np.empty(original_data.shape, dtype="int32") - symbolic_surrogates = np.empty(original_data.shape, dtype="int32") - - # Initialize array to hold 1d-histograms of individual time series - hist_original = np.zeros((N, n_bins), dtype="int32") - hist_surrogates = np.zeros((N, n_bins), dtype="int32") - - # Initialize array to hold 2d-histogram for one pair of time series - hist2d = np.zeros((n_bins, n_bins), dtype="int32") - - # Initialize mutual information array - mi = np.zeros((N, N), dtype="float32") - # Calculate symbolic time series and histograms # Calculate 2D histograms and mutual information # mi[i,j] gives the mutual information between the ith original_data # time series and the jth surrogate time series. - code = r""" - int i, j, k, l, m; - int symbol, symbol_i, symbol_j; - double rescaled, norm, hpl, hpm, plm; - - // Calculate histogram norm - norm = 1.0 / n_time; - - for (i = 0; i < N; i++) { - for (k = 0; k < n_time; k++) { - - // Original time series - // Calculate symbolic trajectories for each time series, - // where the symbols are bins - rescaled = scaling * (original_data(i,k) - range_min); - - if (rescaled< 1.0) - symbolic_original(i,k) = rescaled * n_bins; - else - symbolic_original(i,k) = n_bins - 1; - - // Calculate 1d-histograms for single time series - symbol = symbolic_original(i,k); - hist_original(i,symbol) += 1; - - // Surrogate time series - // Calculate symbolic trajectories for each time series, - // where the symbols are bins - rescaled = scaling * (surrogates(i,k) - range_min); - - if (rescaled < 1.0) - symbolic_surrogates(i,k) = rescaled * n_bins; - else - symbolic_surrogates(i,k) = n_bins - 1; - - // Calculate 1d-histograms for single time series - symbol = symbolic_surrogates(i,k); - hist_surrogates(i,symbol) += 1; - } - } - - for (i = 0; i < N; i++) { - for (j = 0; j < N; j++) { - - // The case i = j is not of interest here! - if (i != j) { - // Calculate 2d-histogram for one pair of time series - // (i,j). - for (k = 0; k < n_time; k++) { - symbol_i = symbolic_original(i,k); - symbol_j = symbolic_surrogates(j,k); - hist2d(symbol_i,symbol_j) += 1; - } - - // Calculate mutual information for one pair of time - // series (i,j). - for (l = 0; l < n_bins; l++) { - hpl = hist_original(i,l) * norm; - if (hpl > 0.0) { - for (m = 0; m < n_bins; m++) { - hpm = hist_surrogates(j,m) * norm; - if (hpm > 0.0) { - plm = hist2d(l,m) * norm; - if (plm > 0.0) { - mi(i,j) += plm * log(plm/hpm/hpl); - } - } - } - } - } - - // Reset hist2d to zero in all bins - for (l = 0; l < n_bins; l++) { - for (m = 0; m < n_bins; m++) - hist2d(l,m) = 0; - } - } - } - } - """ - - # original_data and surrogates must be contiguous Numpy arrays for - # this code to work correctly! - # All other arrays are generated from scratch in this method and - # are guaranteed to be contiguous by np. - fastCode = r""" - long i, j, k, l, m, in_bins, jn_bins, in_time, jn_time; - double norm, rescaled, hpl, hpm, plm; - - double *p_original, *p_surrogates; - float *p_mi; - long *p_symbolic_original, *p_symbolic_surrogates, *p_hist_original, - *p_hist_surrogates, *p_hist2d; - - // Calculate histogram norm - norm = 1.0 / n_time; - - // Initialize in_bins, in_time - in_time = in_bins = 0; - - for (i = 0; i < N; i++) { - - // Set pointer to original_data(i,0) - p_original = original_data + in_time; - // Set pointer to surrogates(i,0) - p_surrogates = surrogates + in_time; - // Set pointer to symbolic_original(i,0) - p_symbolic_original = symbolic_original + in_time; - // Set pointer to symbolic_surrogates(i,0) - p_symbolic_surrogates = symbolic_surrogates + in_time; - - for (k = 0; k < n_time; k++) { - - // Rescale sample into interval [0,1] - rescaled = scaling * (*p_original - range_min); - - // Calculate symbolic trajectories for each time series, - // where the symbols are bin numbers. - if (rescaled < 1.0) - *p_symbolic_original = rescaled * n_bins; - else - *p_symbolic_original = n_bins - 1; - - // Calculate 1d-histograms for single time series - // Set pointer to hist_original(i, *p_symbolic_original) - p_hist_original = hist_original + in_bins - + *p_symbolic_original; - (*p_hist_original)++; - - // Rescale sample into interval [0,1] - rescaled = scaling * (*p_surrogates - range_min); - - // Calculate symbolic trajectories for each time series, - // where the symbols are bin numbers. - if (rescaled < 1.0) - *p_symbolic_surrogates = rescaled * n_bins; - else - *p_symbolic_surrogates = n_bins - 1; - - // Calculate 1d-histograms for single time series - // Set pointer to hist_surrogates(i, *p_symbolic_surrogates) - p_hist_surrogates = hist_surrogates + in_bins - + *p_symbolic_surrogates; - (*p_hist_surrogates)++; - - // Set pointer to original_data(i,k+1) - p_original++; - // Set pointer to surrogates(i,k+1) - p_surrogates++; - // Set pointer to symbolic_original(i,k+1) - p_symbolic_original++; - // Set pointer to symbolic_surrogates(i,k+1) - p_symbolic_surrogates++; - } - in_bins += n_bins; - in_time += n_time; - } - - // Initialize in_time, in_bins - in_time = in_bins = 0; - - for (i = 0; i < N; i++) { - - // Set pointer to mi(i,0) - p_mi = mi + i*N; - - // Initialize jn_time = 0; - jn_time = jn_bins = 0; - - for (j = 0; j < N; j++) { - - // Don't do anything if i = j, this case is not of - // interest here! - if (i != j) { - - // Set pointer to symbolic_original(i,0) - p_symbolic_original = symbolic_original + in_time; - // Set pointer to symbolic_surrogates(j,0) - p_symbolic_surrogates = symbolic_surrogates + jn_time; - - // Calculate 2d-histogram for one pair of time series - // (i,j). - for (k = 0; k < n_time; k++) { - - // Set pointer to hist2d(*p_symbolic_original, - // *p_symbolic_surrogates) - p_hist2d = hist2d + (*p_symbolic_original)*n_bins - + *p_symbolic_surrogates; - - (*p_hist2d)++; - - // Set pointer to symbolic_original(i,k+1) - p_symbolic_original++; - // Set pointer to symbolic_surrogates(j,k+1) - p_symbolic_surrogates++; - } - - // Calculate mutual information for one pair of time - // series (i,j) - - // Set pointer to hist_original(i,0) - p_hist_original = hist_original + in_bins; - - for (l = 0; l < n_bins; l++) { - - // Set pointer to hist_surrogates(j,0) - p_hist_surrogates = hist_surrogates + jn_bins; - // Set pointer to hist2d(l,0) - p_hist2d = hist2d + l*n_bins; - - hpl = (*p_hist_original) * norm; - - if (hpl > 0.0) { - for (m = 0; m < n_bins; m++) { - - hpm = (*p_hist_surrogates) * norm; - - if (hpm > 0.0) { - plm = (*p_hist2d) * norm; - if (plm > 0.0) - *p_mi += plm * log(plm/hpm/hpl); - } - - // Set pointer to hist_surrogates(j,m+1) - p_hist_surrogates++; - // Set pointer to hist2d(l,m+1) - p_hist2d++; - } - } - // Set pointer to hist_original(i,l+1) - p_hist_original++; - } - - // Reset hist2d to zero in all bins - for (l = 0; l < n_bins; l++) { - - // Set pointer to hist2d(l,0) - p_hist2d = hist2d + l*n_bins; - - for (m = 0; m < n_bins; m++) { - *p_hist2d = 0; - - // Set pointer to hist2d(l,m+1) - p_hist2d++; - } - } - } - // Set pointer to mi(i,j+1) - p_mi++; - - jn_time += n_time; - jn_bins += n_bins; - } - in_time += n_time; - in_bins += n_bins; - } - """ - args = ['n_time', 'N', 'n_bins', 'scaling', 'range_min', - 'original_data', 'surrogates', 'symbolic_original', - 'symbolic_surrogates', 'hist_original', 'hist_surrogates', - 'hist2d', 'mi'] - if fast: - weave_inline(locals(), fastCode, args, blitz=False) - else: - weave_inline(locals(), code, args) - return mi + return _test_mutual_information(original_data.copy(order='c'), + surrogates.copy(order='c'), N, n_time, + n_bins, fast) # # Define methods to perform significance tests on correlation measures diff --git a/pyunicorn/timeseries/visibility_graph.py b/pyunicorn/timeseries/visibility_graph.py index c6f9f4b6..7bd78f91 100644 --- a/pyunicorn/timeseries/visibility_graph.py +++ b/pyunicorn/timeseries/visibility_graph.py @@ -19,6 +19,7 @@ _visibility_relations_missingvalues,\ _visibility_relations_no_missingvalues, _visibility_relations_horizontal,\ _retarded_local_clustering, _advanced_local_clustering + from .. import InteractingNetworks diff --git a/tests/test_timeseries/TestTimeseries.py b/tests/test_timeseries/TestTimeseries.py index ed4b85a8..f073163a 100644 --- a/tests/test_timeseries/TestTimeseries.py +++ b/tests/test_timeseries/TestTimeseries.py @@ -12,21 +12,17 @@ import numpy as np +from pyunicorn.timeseries import CrossRecurrencePlot from pyunicorn.timeseries import Surrogates from pyunicorn.core.data import Data from numpy.testing import assert_array_equal from numpy.testing import assert_array_almost_equal - -# ----------------------------------------------------------------------------- -# surrogates -# ----------------------------------------------------------------------------- - # turn off for weave compilation & error detection parallel = False -def test_TestPearsonCorrelation(): +def create_test_data(): # Create test time series tdata = Data.SmallTestData().observable() n_index, n_times = tdata.shape @@ -34,9 +30,51 @@ def test_TestPearsonCorrelation(): tdata -= np.mean(tdata, axis=1)[:,None] # normalize the data tdata /= np.sqrt(np.sum(tdata*tdata, axis=1))[:,None] + return tdata - norm = 1.0 / float(n_times) +# ----------------------------------------------------------------------------- +# cross_recurrence_plot +# ----------------------------------------------------------------------------- + +def testManhattanDistanceMatrix(): + tdata = create_test_data() + n_index, n_times = tdata.shape + c = CrossRecurrencePlot(x=tdata, y=tdata, threshold=1.0) + manh_dist = c.manhattan_distance_matrix(tdata.T, tdata.T) + +def testEuclideanDistanceMatrix(): + tdata = create_test_data() + n_index, n_times = tdata.shape + c = CrossRecurrencePlot(x=tdata, y=tdata, threshold=1.0) + eucl_dist = c.euclidean_distance_matrix(tdata.T, tdata.T) + +def testSupremumDistanceMatrix(): + tdata = create_test_data() + n_index, n_times = tdata.shape + c = CrossRecurrencePlot(x=tdata, y=tdata, threshold=1.0) + supr_dist = c.supremum_distance_matrix(tdata.T, tdata.T) + + +# ----------------------------------------------------------------------------- +# surrogates +# ----------------------------------------------------------------------------- + +def testTwinSurrogates(): + tdata = create_test_data() + n_index, n_times = tdata.shape + s = Surrogates(tdata) + tsurro = s.twin_surrogates(tdata, 1, 0, 0.2) + corrcoef = np.corrcoef(tdata, tsurro)[n_index:,:n_index] + for i in xrange(n_index): + corrcoef[i,i]=0.0 + assert (corrcoef>=-1.0).all() and (corrcoef<=1.0).all() + + +def test_TestPearsonCorrelation(): + tdata = create_test_data() + n_index, n_times = tdata.shape + norm = 1.0 / float(n_times) c = Surrogates.test_pearson_correlation(tdata, tdata, fast=True) corrcoef = np.corrcoef(tdata, tdata)[n_index:,:n_index]*norm for i in xrange(n_index): @@ -44,3 +82,11 @@ def test_TestPearsonCorrelation(): assert c.shape == (n_index, n_index) assert_array_almost_equal(c, corrcoef, decimal=5) + + +def test_TestMutualInformation(): + tdata = create_test_data() + n_bins=32 + test_mi = Surrogates.test_mutual_information(tdata[:1], tdata[:1], + n_bins=n_bins, fast=False) + assert (test_mi>=-1.0).all() and (test_mi<=1.0).all()