From da0f2c5a58efa6fdfc59d4482679be796e4996cf Mon Sep 17 00:00:00 2001 From: jakobrunge Date: Fri, 10 Sep 2021 16:02:50 +0200 Subject: [PATCH 01/49] new version 4.3: removed cython dep, added numbe dep, fixed mask doc string --- environment_py3.yml | 7 +- setup.py | 37 +------- tigramite/data_processing.py | 56 ++++++++--- tigramite/independence_tests/cmiknn.py | 95 ++++++++++++------- .../independence_tests_base.py | 23 ++--- tigramite/models.py | 26 ++--- tigramite/pcmci.py | 3 +- .../tigramite_tutorial_missing_masking.ipynb | 83 ++++++++-------- tutorials/tigramite_tutorial_prediction.ipynb | 2 +- 9 files changed, 183 insertions(+), 149 deletions(-) diff --git a/environment_py3.yml b/environment_py3.yml index 8b02bac1..4e423639 100644 --- a/environment_py3.yml +++ b/environment_py3.yml @@ -1,5 +1,6 @@ name: tigramite4_conda dependencies: - - python=3 - - numpy>=1.17.0 - - scipy>=1.3.0 + - python=3.7 + - numpy>=1.21.2 + - scipy>=1.7.1 + - numba>=0.53.1 diff --git a/setup.py b/setup.py index 0e0e8a94..0173f6c8 100644 --- a/setup.py +++ b/setup.py @@ -28,35 +28,6 @@ def run(self): # Call original build_ext command build_ext.run(self) - -# Handle cythonizing code only in development mode -def define_extension(extension_name, source_files=None): - """ - Will define an extension from the *.c files unless in "setup.py develop" - is called. If this is in develop mode, then it tries to import cython - and regenerate the *.c files from the *.pyx files - :return: single-element list of needed extension - """ - # Default source file - if source_files is None: - source_files = [str((pathlib.Path(__file__).parent / extension_name.replace(".", "/")).with_suffix(".c"))] - # If we are, try to import and use cythonize - try: - from Cython.Build import cythonize - # Return the cythonized extension - pyx_path = str((pathlib.Path(__file__).parent / extension_name.replace(".", "/")).with_suffix(".pyx")) - return cythonize([pyx_path], language_level = "3") - except ImportError: - print( - "Cython cannot be found. Skipping generation of C code from" - + " cython and using pre-compiled C code instead" - ) - return [Extension(extension_name, source_files, - extra_compile_args=['-fopenmp'], - extra_link_args=['-fopenmp'],)] - - - with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() @@ -79,18 +50,15 @@ def define_extension(extension_name, source_files=None): "torch>=1.7", "gpytorch>=1.4", "dcor>=0.5.3"] EXTRAS_REQUIRE["test"] = TESTS_REQUIRE # Define the extras needed for development -EXTRAS_REQUIRE["dev"] = EXTRAS_REQUIRE["all"] + TESTS_REQUIRE + ["cython"] +EXTRAS_REQUIRE["dev"] = EXTRAS_REQUIRE["all"] # Use a custom build to handle numpy.include_dirs() when building CMDCLASS = {"build_ext": UseNumpyHeadersBuildExt} -# Define the external modules to build -EXT_MODULES = [] -EXT_MODULES += define_extension("tigramite.tigramite_cython_code") # Run the setup setup( name="tigramite", - version="4.2.2.1", + version="4.3.1.1", packages=["tigramite", "tigramite.independence_tests"], license="GNU General Public License v3.0", description="Tigramite causal discovery for time series", @@ -101,7 +69,6 @@ def define_extension(extension_name, source_files=None): long_description_content_type="text/markdown", keywords="causal inference, causal discovery, prediction, time series", cmdclass=CMDCLASS, - ext_modules=EXT_MODULES, install_requires=INSTALL_REQUIRES, extras_require=EXTRAS_REQUIRE, test_suite="tests", diff --git a/tigramite/data_processing.py b/tigramite/data_processing.py index 1ae526b7..40931540 100644 --- a/tigramite/data_processing.py +++ b/tigramite/data_processing.py @@ -11,6 +11,7 @@ import numpy as np import scipy.sparse import scipy.sparse.linalg +from numba import jit class DataFrame(): """Data object containing time series array and optional mask. @@ -69,6 +70,9 @@ def __init__(self, data, mask=None, missing_flag=None, var_names=None, # raise ValueError("NaNs in the data") self._check_mask() + self.T = T + self.N = N + # If PCMCI.run_bootstrap_of is called, then the # bootstrap random draw can be set here self.bootstrap = None @@ -131,10 +135,10 @@ def construct_array(self, X, Y, Z, tau_max, Optional mask array, must be of same shape as data. If it is set, then it overrides the self.mask assigned to the dataframe. If it is None, then the self.mask is used, if it exists. - mask_type : {'y','x','z','xy','xz','yz','xyz'} + mask_type : {None, 'y','x','z','xy','xz','yz','xyz'} Masking mode: Indicators for which variables in the dependence - measure I(X; Y | Z) the samples should be masked. If None, 'y' is - used, which excludes all time slices containing masked samples in Y. + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. Explained in [1]_. return_cleaned_xyz : bool, optional (default: False) Whether to return cleaned X,Y,Z, where possible duplicates are @@ -323,8 +327,8 @@ def print_array_info(self, array, X, Y, Z, missing_flag, mask_type): Supplement of [1]_. mask_type : {'y','x','z','xy','xz','yz','xyz'} Masking mode: Indicators for which variables in the dependence - measure I(X; Y | Z) the samples should be masked. If None, 'y' is - used, which excludes all time slices containing masked samples in Y. + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. Explained in [1]_. """ indt = " " * 12 @@ -332,7 +336,7 @@ def print_array_info(self, array, X, Y, Z, missing_flag, mask_type): "\n" + indt + "X = %s" % str(X) + "\n" + indt + "Y = %s" % str(Y) + "\n" + indt + "Z = %s" % str(Z)) - if self.mask is not None: + if self.mask is not None and mask_type is not None: print(indt+"with masked samples in %s removed" % mask_type) if self.missing_flag is not None: print(indt+"with missing values = %s removed" % self.missing_flag) @@ -522,6 +526,38 @@ def time_bin_with_mask(data, time_bin_length, mask=None): return (bindata.squeeze(), T) +@jit +def _get_patterns(array, array_mask, patt, patt_mask, weights, dim, step, fac, N, T): + v = np.zeros(dim, dtype='float') + + start = step * (dim - 1) + for n in range(0, N): + for t in range(start, T): + mask = 1 + ave = 0. + for k in range(0, dim): + tau = k * step + v[k] = array[t - tau, n] + ave += v[k] + mask *= array_mask[t - tau, n] + ave /= dim + var = 0. + for k in range(0, dim): + var += (v[k] - ave) ** 2 + var /= dim + weights[t - start, n] = var + if (v[0] < v[1]): + p = 1 + else: + p = 0 + for i in range(2, dim): + for j in range(0, i): + if (v[j] < v[i]): + p += fac[i] + patt[t - start, n] = p + patt_mask[t - start, n] = mask + + return patt, patt_mask, weights def ordinal_patt_array(array, array_mask=None, dim=2, step=1, weights=False, verbosity=0): @@ -603,11 +639,7 @@ def ordinal_patt_array(array, array_mask=None, dim=2, step=1, # _get_patterns_cython assumes mask=0 to be a masked value array_mask = (array_mask == False).astype('int32') - (patt, patt_mask, weights_array) = \ - tigramite_cython_code._get_patterns_cython(array, array_mask, - patt, patt_mask, - weights_array, dim, - step, fac, N, T) + (patt, patt_mask, weights_array) = _get_patterns(array, array_mask, patt, patt_mask, weights_array, dim, step, fac, N, T) weights_array = np.asarray(weights_array) patt = np.asarray(patt) @@ -1430,4 +1462,4 @@ def nonlin_f(x): return (x + 5. * x**2 * np.exp(-x**2 / 20.)) noises = [np.random.randn, np.random.randn, np.random.randn] data, nonstat = structural_causal_process(links, T=100, noises=noises) - print(data.shape) \ No newline at end of file + print(data.shape) diff --git a/tigramite/independence_tests/cmiknn.py b/tigramite/independence_tests/cmiknn.py index 226e3420..0db8e0ac 100644 --- a/tigramite/independence_tests/cmiknn.py +++ b/tigramite/independence_tests/cmiknn.py @@ -5,14 +5,10 @@ # License: GNU General Public License v3.0 from __future__ import print_function -from scipy import special, stats, spatial +from scipy import special, spatial import numpy as np -import warnings from .independence_tests_base import CondIndTest -try: - from tigramite import tigramite_cython_code -except Exception as e: - warnings.warn(str(e)) +from numba import jit class CMIknn(CondIndTest): @@ -105,7 +101,7 @@ def __init__(self, shuffle_neighbors=5, significance='shuffle_test', transform='ranks', - n_jobs=-1, + workers=-1, **kwargs): # Set the member variables self.knn = knn @@ -115,7 +111,7 @@ def __init__(self, self.two_sided = False self.residual_based = False self.recycle_residuals = False - self.n_jobs = n_jobs + self.workers = workers # Call the parent constructor CondIndTest.__init__(self, significance=significance, **kwargs) # Print some information about construction @@ -126,6 +122,7 @@ def __init__(self, print("knn = %s" % self.knn) print("shuffle_neighbors = %d\n" % self.shuffle_neighbors) + @jit def _get_nearest_neighbors(self, array, xyz, knn): """Returns nearest neighbors according to Frenzel and Pompe (2007). @@ -175,25 +172,36 @@ def _get_nearest_neighbors(self, array, xyz, knn): elif self.transform == 'ranks': array = array.argsort(axis=1).argsort(axis=1).astype('float') + array = array.T + tree_xyz = spatial.cKDTree(array) + epsarray = tree_xyz.query(array, k=[knn+1], p=np.inf, + eps=0., workers=self.workers)[0][:, 0].astype('float') - # Use cKDTree to get distances eps to the k-th nearest neighbors for - # every sample in joint space XYZ with maximum norm - tree_xyz = spatial.cKDTree(array.T) - epsarray = tree_xyz.query(array.T, k=knn+1, p=np.inf, - eps=0., n_jobs=self.n_jobs)[0][:, knn].astype('float') + # To search neighbors < eps + epsarray = np.multiply(epsarray, 0.99999) + + # Subsample indices + x_indices = np.where(xyz == 0)[0] + y_indices = np.where(xyz == 1)[0] + z_indices = np.where(xyz == 2)[0] + + # Find nearest neighbors in subspaces + xz = array[:, np.concatenate((x_indices, z_indices))] + tree_xz = spatial.cKDTree(xz) + k_xz = tree_xz.query_ball_point(xz, r=epsarray, eps=0., p=np.inf, workers=self.workers, return_length=True) + + yz = array[:, np.concatenate((y_indices, z_indices))] + tree_yz = spatial.cKDTree(yz) + k_yz = tree_yz.query_ball_point(yz, r=epsarray, eps=0., p=np.inf, workers=self.workers, return_length=True) + + if len(z_indices) > 0: + z = array[:, z_indices] + tree_z = spatial.cKDTree(z) + k_z = tree_z.query_ball_point(z, r=epsarray, eps=0., p=np.inf, workers=self.workers, return_length=True) + else: + # Number of neighbors is T when z is empty. + k_z = np.full(T, T, dtype='float') - # Prepare for fast cython access - dim_x = int(np.where(xyz == 0)[0][-1] + 1) - dim_y = int(np.where(xyz == 1)[0][-1] + 1 - dim_x) - - k_xz, k_yz, k_z = \ - tigramite_cython_code._get_neighbors_within_eps_cython(array, - T, - dim_x, - dim_y, - epsarray, - knn, - dim) return k_xz, k_yz, k_z def get_dependence_measure(self, array, xyz): @@ -291,14 +299,13 @@ class which is a block shuffle test, which does not preserve # Generate random order in which to go through indices loop in # next step order = self.random_state.permutation(T).astype('int32') - # print(order[:5]) + # Shuffle neighbor indices for each sample index - for i in range(T): - self.random_state.shuffle(neighbors[i]) + neighbors = self.random_state.permuted(neighbors, axis=1) + # Select a series of neighbor indices that contains as few as # possible duplicates - restricted_permutation = \ - tigramite_cython_code._get_restricted_permutation_cython( + restricted_permutation = self.get_restricted_permutation( T=T, shuffle_neighbors=self.shuffle_neighbors, neighbors=neighbors, @@ -319,11 +326,11 @@ class which is a block shuffle test, which does not preserve sig_blocklength=self.sig_blocklength, verbosity=self.verbosity) - # Sort - null_dist.sort() pval = (null_dist >= value).mean() if return_null_dist: + # Sort + null_dist.sort() return pval, null_dist return pval @@ -393,8 +400,8 @@ def get_conditional_entropy(self, array, xyz): x_array = np.fastCopyAndTranspose(array[x_indices, :]) tree_xyz = spatial.cKDTree(x_array) - epsarray = tree_xyz.query(x_array, k=knn_here+1, p=np.inf, - eps=0., n_jobs=self.n_jobs)[0][:, knn_here].astype('float') + epsarray = tree_xyz.query(x_array, k=[knn_here+1], p=np.inf, + eps=0., workers=self.workers)[0][:, 0].astype('float') h_x = - special.digamma(knn_here) + special.digamma(T) + dim_x * np.log(2.*epsarray).mean() @@ -409,3 +416,23 @@ def get_conditional_entropy(self, array, xyz): h_x_y = h_x - i_xy return h_x_y + + + @jit + def get_restricted_permutation(self, T, shuffle_neighbors, neighbors, order): + + restricted_permutation = np.zeros(T, dtype='int32') + used = np.array([], dtype='int32') + + for sample_index in order: + m = 0 + use = neighbors[sample_index, m] + + while ((use in used) and (m < shuffle_neighbors - 1)): + m += 1 + use = neighbors[sample_index, m] + + restricted_permutation[sample_index] = use + used = np.append(used, use) + + return restricted_permutation diff --git a/tigramite/independence_tests/independence_tests_base.py b/tigramite/independence_tests/independence_tests_base.py index 9a2b2a06..fdb1a527 100644 --- a/tigramite/independence_tests/independence_tests_base.py +++ b/tigramite/independence_tests/independence_tests_base.py @@ -27,11 +27,10 @@ class CondIndTest(): Seed for RandomState (default_rng) mask_type : str, optional (default = None) - Must be in {'y','x','z','xy','xz','yz','xyz'} + Must be in {None, 'y','x','z','xy','xz','yz','xyz'} Masking mode: Indicators for which variables in the dependence measure - I(X; Y | Z) the samples should be masked. If None, 'y' is used, which - excludes all time slices containing masked samples in Y. Explained in - [1]_. + I(X; Y | Z) the samples should be masked. If None, the mask is not used. + Explained in [1]_. significance : str, optional (default: 'analytic') Type of significance test to use. In this package 'analytic', @@ -133,10 +132,9 @@ def set_mask_type(self, mask_type): Parameters ---------- mask_type : str - Must be in {'y','x','z','xy','xz','yz','xyz'} - Masking mode: Indicators for which variables in the dependence - measure I(X; Y | Z) the samples should be masked. If None, 'y' is - used, which excludes all time slices containing masked samples in Y. + Must be in {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence measure + I(X; Y | Z) the samples should be masked. If None, the mask is not used. Explained in [1]_. """ # Set the mask type @@ -183,10 +181,9 @@ def print_info(self): def _check_mask_type(self): """ mask_type : str, optional (default = None) - Must be in {'y','x','z','xy','xz','yz','xyz'} - Masking mode: Indicators for which variables in the dependence - measure I(X; Y | Z) the samples should be masked. If None, 'y' is - used, which excludes all time slices containing masked samples in Y. + Must be in {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence measure + I(X; Y | Z) the samples should be masked. If None, the mask is not used. Explained in [1]_. """ if self.mask_type is not None: @@ -954,8 +951,6 @@ def _get_shuffle_dist(self, array, xyz, dependence_measure, null_dist[sam] = dependence_measure(array=array_shuffled, xyz=xyz) - null_dist.sort() - return null_dist def get_fixed_thres_significance(self, value, fixed_thres): diff --git a/tigramite/models.py b/tigramite/models.py index 50d8ca96..64771e67 100644 --- a/tigramite/models.py +++ b/tigramite/models.py @@ -43,11 +43,11 @@ class Models(): Used to transform data prior to fitting. For example, sklearn.preprocessing.StandardScaler for simple standardization. The fitted parameters are stored. - mask_type : {'y','x','z','xy','xz','yz','xyz'} - Masking mode: Indicators for which variables in the dependence measure - I(X; Y | Z) the samples should be masked. If None, 'y' is used, which - excludes all time slices containing masked samples in Y. Explained in - [1]_. + mask_type : {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. + Explained in [1]_. verbosity : int, optional (default: 0) Level of verbosity. """ @@ -319,7 +319,7 @@ def get_fit(self, all_parents, if all_parents[j]: this_parent_lag = np.abs(np.array(all_parents[j])[:, 1]).max() max_parents_lag = max(max_parents_lag, this_parent_lag) - # Set the default tau max and check if it shoudl be overwritten + # Set the default tau_max and check if it should be overwritten self.tau_max = max_parents_lag if tau_max is not None: self.tau_max = tau_max @@ -476,11 +476,11 @@ class LinearMediation(Models): Used to transform data prior to fitting. For example, sklearn.preprocessing.StandardScaler for simple standardization. The fitted parameters are stored. - mask_type : {'y','x','z','xy','xz','yz','xyz'} - Masking mode: Indicators for which variables in the dependence measure - I(X; Y | Z) the samples should be masked. If None, 'y' is used, which - excludes all time slices containing masked samples in Y. Explained in - [1]_. + mask_type : {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. + Explained in [1]_. verbosity : int, optional (default: 0) Level of verbosity. """ @@ -528,7 +528,9 @@ def fit_model(self, all_parents, tau_max=None): var, lag = parent if lag == 0: raise ValueError("all_parents cannot contain " - "contemporaneous links. Remove these.") + "contemporaneous links for the LinearMediation" + " class. Use the optimal causal effects " + "class.") # Fit the model using the base class self.fit_results = self.get_fit(all_parents=all_parents, diff --git a/tigramite/pcmci.py b/tigramite/pcmci.py index 54f6d65a..859eca75 100644 --- a/tigramite/pcmci.py +++ b/tigramite/pcmci.py @@ -155,7 +155,8 @@ def __init__(self, dataframe, "functions run_pcmci() etc.") # Store the shape of the data in the T and N variables - self.T, self.N = self.dataframe.values.shape + self.T = self.dataframe.T + self.N = self.dataframe.N def _set_sel_links(self, selected_links, tau_min, tau_max, remove_contemp=False): diff --git a/tutorials/tigramite_tutorial_missing_masking.ipynb b/tutorials/tigramite_tutorial_missing_masking.ipynb index 9510a3c6..c90a920b 100644 --- a/tutorials/tigramite_tutorial_missing_masking.ipynb +++ b/tutorials/tigramite_tutorial_missing_masking.ipynb @@ -42,7 +42,7 @@ "from tigramite import plotting as tp\n", "from tigramite.pcmci import PCMCI\n", "from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb\n", - "from tigramite.models import LinearMediation, Prediction\n" + "from tigramite.models import LinearMediation, Prediction" ] }, { @@ -56,7 +56,45 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO2dd7hdRdWH318aAUJoobfQm1IDBBCQTkINvTchdAFFilEBURCkqIAgSBUIHYGAgdB7SZAmRTqE3ovIp+D6/lhz4XByetv73rve5znPvXv27Nlrzy5rZs2aNTIzgiAIgiBv9MlagCAIgiAoRSioIAiCIJeEggqCIAhySSioIAiCIJeEggqCIAhySSioIAiCIJf0y1qAckh6GfgU+Ar40syGZStREARB0Elyq6ASa5rZe1kLEQRBEHSeMPEFQRAEuSTPCsqAmyVNkjS6VAZJoyVNTL8nOyxfEARB0EaU11BHkuY0szckzQpMAA4ws7sq5J8Y41RBEAQ9h9z2oMzsjfT3HeAaYMVsJQqCIAg6SS4VlKRpJU3X9T+wHhAmvCAIgl5EXr34ZgOukQQu4yVmNj5bkYIgCIJOkksFZWYvAktnLUcQBEGQHbk08QVBEARBKKggCIIgl4SCCoIgCHJJKKggCIIySOor6XZJc2YtS28kFFQQBEF5VgS+D2yTsRy9klBQQRAE5dkEeBDYLmtBeiOhoIIgCMqzMXAIMJ+khbIWprcRCioIWkQarxiQtRxBa5C0ADALcB9wBWHm6zihoIKgQeT8XNKNkp4FPgduy0COWTp9zl7CxsANZvY/YCxh5us4oaCCoHHWA3YAzgQ2B2YFviNptk4JIGkQ8KqkrVpYpiStkJTvDK0qt8y5ZpC0VjvP0QQbA9el/+8HBkv6boby9DpCQQVBA8gDRR4FHGlm15nZP8zsY3xpmBEdFGUd4A3g1LQ0TcNIGiRpH+AR4FJgOB4Tc6rmxSzLXsCFqT5zg6TpgZXw+0nqRV1K9KI6SiioIGiM9YDBwJVF6eOAjToox0bAqcAFwB+b/ND/ERgFHAosjHuwfQCcK6nl34ok6054z3PhVpffJBsAd5vZvwrSxgLb5k2Z9mRCQQVBnRT0nn5pZl8V7f4bsE4nnCWS0tgQuB44ElicBgfyU1kjgD3MbIKZ/S9d247A/MCvWyP1t1gKGARcBuTNzFdo3uviUeC/xNp0HaNXKihJW0haOWs5gm5Lud5T1wKbTwOrd0CO5YGPzOwFM/sC2BX4vaTZGyhrOeA9M3u1MNHM/o33pLaQtHezAhexI3AxcAs5UlCS+uHKelxhuvny42OB3aIX1Rl6pIKS1E/SuaVs5+nBOhG3rc/VeemC7kyV3lMXnTLzbUTBR9TMHgbOBk5poKz1gZtK7TCz94CRwC8ltaT3IKkvsD2uoG4H1myHGbFBVgVeMbPJJfadA3wPuFvS9zorVu8jLw9ESzGzL4GhwKYldg8H/g2cBlwmqX+rzitpbUnzt6q8IJeU7T0VMA7YuAOt7G8pqMTxwIgGHCbWo4yCAjCz54F9gEtb5Nn3feAtM3sq9do+BpasdICkWSSt1oJzV2NT3Gw6BWb2Gr5W3VnARZJuiAm87aNHKqjEn4EflEjfHrgEOBZ/KX7TipNJWg+4ETiuFeUFuWVv4MQKvSeAx4EBwGLtEiL1/ucH7i1MN7NPgWvx57zWsgbjJr67KuUzs6vwZ/zPLVC+OwEXFWzfRgUzn6R5gXuAq5IJri2kBmtXz64kZvaVmV0ILArcgfemYlyqDfRkBXUNMEzSfF0J6cHeGhib3EZ3AjaXtEUzJ5K0PP5AjwLWTS9TUCfJNHtWXieepo/XmvhHuixprKIlZj5J00o6QdIESdMV7NoQGJ+sBcWcj49H1cqawINFHmvlOARYAO9NVUXSypKuk/Q7SdOktGnwXsrYgqxlFZSkRYC78flmL5XL1yJGAC+Y2T+rZTSz/zOz3wKjgRskbdBGuXolPVZBpcHdscBuBclrAy+Z2QspzwfAVsAZkpZr5Dype389sKeZ3Yi7+x7QjOy9mB2AXYATshakDCviz8/bNeRtWkFJGgE8CcwJvA5cXTCuWsq818WdwAySlqnxVGXHn4pJzhjbAEdLWqqC7CtLGo+/gzcCswGT0nu2Ca4Q3yo45HZg9TQ2VVjOMngv5WgzOwWfi7RtjdfVCLsD59ZzgJldjyvcCyTt1BapOkAK1dWxSeY1YWY94gdMLJG2DPAK0Ddtnw8cWCLfKOAtYKk6zzkEeAEYXZA2FHgfmC7rOulOP6Af8Bzu3vsasHodx/bBP7JH4q3ZDfFxgv4tlvFo4Pga804NfALM1MB5huAf9heA9Qrq52r8Az1ttbKBXwK/q/F8LzTw7P8IuKjMvpHAm+leDChI3w54B3gZ2LHEcU8Cwwq250/v5ZYFaXPhc7OmasMzOCvwUaPvLrAE3pBYs9WydeKXnpmXgD4l9vXBG+Ln443IeTsiUwVhBwL7pRdlPD5usx8wddYVWUbeKRRUSp+UPl5TAx8Cc5TJtw0+I3/xOs55AnBmifQrgB9mXSdN1ud3gTEdPN9OwJ3p/y2AfxR+3MocMwfw0/RSPYKP//0Zn4v0LB6eZs4Kxy8JHAM8BexSg4z3A2vXcU3XUdB4Kdq3Zfog7wz0K0jfMH3kTgKmKTpmIN6buL+rriqce8GkDKrV4ULpuVed92vm9DGfscS+8cDOZY6bB/gDMKjEvj8AhxZc6yTgoBL57gQ2acMz+CPg/CbL2BBX+NO2Wr52/vCoGW8l2VctsX+N9J7sA1yenq330u8j4FNgr5bLVUHgy/EW0LzA9OnvnsAVWVdmGXnLKah907VsCUyoUsZOwGRg4RrONz3eU5qvxL6V043um3W9NFGfVwD/AYZ04Fx9k0JZO20LNwsdVuGY7wDv4t5Uy5fY3wdXXq8D3ytIH4KPozyJ99ROAg4H7qgi44zpJRxYx3Utn17kBYrS50npo3HHhGfTs3cWrmzXqPLcPUIJS0CJvHcBm1XJs1+jH2W8N7dfUdoC6b7UXE8Fx26Gj6sBnJGewSkUZ/pIXtLiZ1DpmShb93WUdRFwcivla+cPmCY9g1sCY4A/lshzDnBIwXYf3Gw7CzBT+ua9TpUGUYly++Hj97OX3F/hwLvqSW9DpW2QKu154PAa8pdTUDPgGv4OYLcaytkb98KaoptblO9Qypg40v4HgFFZP3xFMs1DbT2FBfGW0TW1fAhbINcOuIeWCtIWSDIMLXNP/wnsVEPZI4C3gV+kD8dH+Djhal33GO9df1TuJUl5tgD+1sC1HQg83PXi4sr4DuCItC18bPQ2fA7T4BrK7Fvt+Uz5dgeuKdgegk+Onbsg7Vpguwbv2zp4dIXC+3Y87uXYSHkz4qbL3dL9LVkXfGOKa1kvBVgBb1TW1ZMsU9YQ3MQ5vFXytfOHh8q6KP3f1cDoX7B/GipYnwryTQB2rfPch6fjStZ7pQN/hZv1dsIHNbvcQo/tQIX1TQ/LAri77mPAElWOKamg0r6/AF8A09dwbqUPypYV8kyFm0XK2u1xk2FHlHkd9Xoh8CVpXKNCvlNxN/y1Ut03/dJWudfPAOuW2DcGN2cNLUjrg5vOTq3jHAvirfGDKDNuk571fSuU8Sfg4AauT0kJnJS2D8NNVG3vXQPTpQ/5KNxU/xHeM/0AtyqshU+1mKXB8vvgPb7l0/ZUeM+wqgWiQpmT8J7qd6vkuwnYuoV1dQbwsxaWtw1uEmv5WFmLn5F1cUvCjAVp9wEbFmxvTw2Ns1TWk7V+L/Axu5KN0K/zVClgSbxHcUT6+50OVdrKwE0F20eQWpwVjqmkoJbFvYBqPf9GVOhF4fOrKt4wvOv6FLB31g9hkmdRvGU0Cu+Kl/wo4WMLH+DjO10foOXaKNcO6YUoZcrph7ew3sM9I/vgPaF7qNOUUIMco4Dby+wTPrC/ZINlz4Q764xJH/CODDCnc5+VnuUDuj5C+ETj/fGGwQNNlv9z4IyCe3lzk+XtRoXGYVG+q1tUR9Pg5vp5WljvAv4K/LbWD3anf/g43ysUNVhxs+/FBdvjqaGXna7578DIGvL2xa1MZRuFZlUUVEFhc3W44rYE/lywvRNwWol8o4GJ6fdyix+uicAWJfb1SS92VU8dfAD6LUr0Djr9w3uRY9L/v8E9ckophTHAuQXbR1FHb6VOmdZJSnOVKvkWTUrp77hyrWhqaFCWLjPfbCX2LYyPTTb8ocHD53wJbJv1s1Agk2jS6Qk3G3+QPvL30CGzNm7m/ZgarCIVylgSD3v2VuEz30IZZ8ctEOc3W89tqsMDgetKpHeZUAfhUxw+oMhhp0KZO1BlPDflOwSfWlB5KKXGk06mxEB0GytuqxIKquJHkgo9qAZl2Cg9XH2K0jcDHqr1Y4UHDX2HKibKNtfnokmGwWl7AG7GLB7gHojbzpcsSJsf78G01FSR6vEdYLUa8/cB9qDADbkN9XQJsE+J9P1a8QFrh2LNww+4Ae8pvE6BR2IHznstcEADxw0Abk3yHgcs2kYZp8XNqxPpYM+5BrmmSe/6smX234ib9n5S+C2uodz+eK9shTL7+wHD0jdlwarl1XjSHfAu8BRdb3x86t4WV15LTXwNytDVi9q8IG0O3EZe1QRRVNbOwIvArJ18CAvOfzHw06K0hfHey+54T0+46fLGEsffBmxVsP1dfEyn0fkiO6aXo2ONnhrlGgXcViL9WnLU88nbD19N2ICjOnzeJdMzvHSdxx2SPsAd8bJN79aP0zNf1lxOB02BqQ6uqrB/x9TweJI65iSmYw9O34fFcFPsWcCDqUHwn/R395rKquOkK+OOAWOSltwNH2N5jip2xAYqr1/6oM/PN04SFe3/rVZQqcyNcS+locDpeFf3pEYebNzp5AFq8DzC1/X5DS2YaJoekndKKRM8QOjl+CDpu3gjZK0yD+vf0ou2T8r7XCMfbbw38hoZ9igryNZl5pu1IK0/bkpqu7t9d/2ld/Q2OjwUkM69A+7xV5OpD29kvgcskoGse1DG/IVHoniYDvRAcdPd21TwKUh5PqXMxN0q5U+XvhEv4VaJA3AT99z1Xl+9F7Yk8Fl6YSfice3qEr6Oc41MD94L1DBhtE0KSniP6UPcFNBwDyiVdR7ufVTRXJZu6mv44OQUExqL8vZLCm1wUXp/YBF8oLYWN/25ca+uUuNS0yTlfAM+B2eR9LLVPCcuXf8vkmKbvx3PTIvu+ViSYws+znEy8FDWcsWv4j07A48uX7UHgo8H1RQNpA1y9sOnzaxWIv0Z4FVq7Fk0KccRwKU11tXPG73Wlsha48lmw1v0H6QP7I14FOWG3FPbVOktV1AF1z7FbPlGbxo+t+hyyvTC8AnRH+DedOfgraqSihFv8V+P92z/lY77Oz5/7Iuk3K+qpuRqlP3o9LGeKm3PgjdUqg7+4uNHf0iyTeGEkKcfbq66G/cefDfdg5Z5d8WvLfdsIN5gPig9awNSo6p/Ub7huHkpszBkuBn95qK03fCpByvhDdOaHBIaPP/06blerIa8/WtR+m2trxov6rPUslw2bQtfFO1FcmKqaZeCaoOcA/EB2j+Vuvm4V9HJBfV8DN7rWK4o33S4F8wlXQ8SPkFwGD6/oO3zL3CzTrVIBYsnhXwXTXhcdfD+TI2bOi+ljYPn8Wv5fZsfN1sZPs7xOW75OC29D31SY6/q5O42yzkAn7IwPG1PhTsVrJK2r6RCBJUWnP8Y4C9Z36+a5a3xohYok75X0sbrZ34h3URBJVmnSy/LUUXpgykRPim1sCYnhbBhUkQP4aaNzMIp4XNpLiyRPhifN/cg3rs7jhy62Va4ro55osWvpfdNhY0+3Gz9S9w54Ql8vl1bhiTqlHNv4Ib0/4HA9QX7Fknf1LqDDNdw3q7QW3O3uux2/ZQEbxhJ6+BxsepdwbOlSJpoZsOylKEeJM2Of8B/YmaXp7SDgZXMbIrlBCQNwMf8foz3Sn6HezY2dwObIC2a9wQeIug/Ka0vPh/mHbyXeLOVXrMoCDpCenc2BR41s+dyIM9UuPl9R7ynvr6ZPVaw/wzgX2Z2SAvPOTU+nn6MmY2tlj8vNK2gACQtambPtkCeZmToVgoKvl7rZgIed/AxfAB1KzN7uMIxwntYL3dEyCpIug/4pZmNT9sH4uM4a5ovChkEQRGSfoh79o4zs+2L9s2Bu3cvZ2avtOh8p+CTbrfNslFbLy1ZsDBr5dRdMbNHcTPpX3Fz2auVlFM6xvKinBJX4QoJSUPx0Dd7hnIKgoqcjYeg+kXxDjN7Ew9ovEcrTiRpLTz4wT7dSTlBD15Rt7tgZlfjprBTcC+57sbVwGaS+uHXcaLVsFx2EPRmzOzfZvY9M3u+TJYb8FBgTSFpenx6yx7mK4h3K1pi4ssD3dHE10Uy240CrjWzr7KWp14kPYLPj1oeWNHM/puxSEHQrZE0EHeWmNvMPq6St085i4Wkk4AZzOwHbRCz7fTLWoDAzXZ4T6S7chXuLRXKKQhagJl9Iel+4Pt4qK1KnCnp32Z2YGGipIXx5dmXbI+U7SdMfEErOBsffJ2UtSBB0IO4hSpmvmTC2xo3s29ctPsE3OT+dpvkazuhoIKmMbN3zOyKrOUIgh7GLfgigJXYGp/4vz1wdvIARNKawDL4dJRuSyioIAiCfPIoMETSPBXy7I4vBXMvcCZwQXJYOhmPSPFFB+RsG6GggiAIckhyfLgNWLvUfklLAPPh8VHB51VNC9yMx+bs9laNUFBBEAT5ZQLlx6F2w0ONfQmQ/u6IR5r5UXeb81SKcDMPgiDIKZLmB+7HV2O2gvT+eOTzNYoDJVRyO+9uRA8qCIIgp5jZS7i57jtFu0YAz5eK4tNTlBOEggqCIMg7pdzNdwfOzUCWjhIKKgiCIN98raAkTSNpJD6Bt9s7QVQjIkkEQRDkm9uAcyXdDqyAr0x9gJl9mq1Y7ScUVBAEQY4xs/fT8hxvA3f3BsXURSioIAiCnGNm52UtQxbEGFQQBEGQS3KnoCQdJel1SY+m38isZQqCIAg6T15NfKeY2YlZCxEEQRBkR+56UEEQBEEA+VVQ+0t6XNK5kmYsl0nSaEkTJU0EBnZQviAIgqDNZBKLT9ItwOwldo0BHgDeAww4Bo9BtXsHxQuCIAhyQK6DxUoaCowzs+I4VEEQBEEPJ3cmvq4VIROjgCezkiUIgiDIjjx68Z0gaRncxPcysFe24gRBEARZkGsTXxAEQdB7yZ2JLwiCIAggFFQQBEGQU0JBBUEQBLkkFFQQBEGQS0JBBUEQBLkkFFQQBEGQS0JBBUEQBLkkFFQQBEGQS0JBBUEQBLkkFFQQBEGQS0JBBUEQBLkkFFQQBEGQS/IYzRwASS8DnwJfAV+a2bBsJQqCIAg6SW4VVGJNM3svayGCIAiCzhMmviAIgiCX5FlBGXCzpEmSRpfKIGm0pInpFyvvBkEQ9CByu2ChpDnN7A1JswITgAPM7K4K+SfGOFUQBEHPIbc9KDN7I/19B7gGWDFbiYIgCIJOkksFJWlaSdN1/Q+sB4QJLwiCoBeRVy++2YBrJIHLeImZjc9WpCAIgqCT5FJBmdmLwNJZyxEEQWeRtCPwiZldl7UsQfbkUkEFQdD7kLQWcCLwpaTFgN9aXr24go6QyzGoIAjyjaQRkr7bwvLmAy4BtgeGA9sBZ0sa0KpzBN2PUFBBENSMpKkk/R44C7hF0vAWlDk1cDVwgpndZmaTgdWAWYG/hZLqvYSCCoKgJJJmTMqja3t+4G5gPmApYFfgOklrNHEOAWcC/wRO6Uo3s8+AUfgwxBaNlh90b0JBBUEwBUlxPA98JOkDSU8ADwFjgVFm9qGZ/Q3YFrhC0voNnGMQcC6u7PYoHm8ys6+A3wH7NXc1QXclFFQQBKWYFQ83NhBYFNgRWNnMTilUJGZ2G7AZcImkuWstXNIw4JF0jtXM7F9lsl4PzCtp2cYuI+jOhIIKgjYhaWNJP5Q0JGtZGmBh4Dlz3jWzx8zs+VIZzew+fAxpu1oKlnQQcCPwczPbPZnzSmJmX+ImwFz2oiSdJekHWcvRUwkFBUhaNpk0gqAlpOfpRGB94HlJ10jaMGOx6mEh3MRXK5cAO1TLJGlp4DBgBTO7rMay/wxsIWmmOuRpO5I2x8fHfhzfj/bQ6xWUpBmASeS0hRZ0W5YD+gIbAfMC44AzJG2QqVS1szDwXB357wSGSFqySr6fASea2Su1FpzicY4DdqtDnrYiaWbgNGBT/D6vnK1EPZNer6CA7wFPAUdKWiprYYIeww54iC4zs0/M7BzgQOAESX0zlq0W6upBmdn/cAeK7cvlScprddxkVy+nA/tKyss36/fAZWZ2D97D2zNjeXokebnZWbIGcDnwI+BSSdNkLE/QzUkKaFvc7FXIX4FPgJ1LHPMDSZt2QLxaWYj6elAAFwPbVzB3jQFOqeAQUYkHgQ+BzHugkjYBVsKvB+ACYDNJ02cnVc8kFJQrqDvN7C+4V9EpVfIHQTW+D7xpZs8UJibvt58AxxQ2hCTtDBwJnC7p8KzHM9L5F6a+MSiAx4DPgVVKlLkYsC7eE6qbVHenA/s2cnyrSEMCfwR2N7PPk2zvALdQo5NIUDu9WkGlJT2WxFtn4A//2pJiYmAPRc72ko5o42l2YMreEwBmdj9wP3BQkmcD4Ld4z2A4sDVwbsbRE2YB/mNmH9ZzUFIiF1PaWeKnwO/N7NMm5LoCWE3SLE2U0Sw7AHeb2d1F6WHmawO9WkHhLb1JZvYFgJl9greCzpAUCyR2YyRtKelASbMXpM0FXAscARzeteZYiWMb7sFIGojPC7q0QrYjgB9JGgFcCGxuZk+lED+rAzMCd0s6L3n/3S7p+BrPL0mDG5U/0UjvqYuxwFaS+hfItBAwEji1GaGSO/oNwJbNlNMkW1L63k4AZpa0XIfl6dH0CgUlaYCkBUvsWgP3PvoaM3sY+AFwbTJL9EokzZk+oN2OdN/OBFYAnpE0QdIvgb/jZtzl8ZA9m5Q4djrguaTMGmFD4O9m9nq5DGk+0cX4JNQ9zezegn2fAZvjHmJ3A38BTgL2klTL6gM74tEfHpR0tKTh5RwLJM0kafUSuxoZf+qS/yU8bNH6koamnurf8N7Tx42UWURXQNmOI2k2YBngpuJ9yUnkXGCPTsvVozGzHvEDJlbYNxp4HZiqKP0eYJ0yx+wKvAzMnfW1ZVCXfXHF/X5xneX9h8duexDYN21PA2yFK6ylC/LtBFxX5lkxPPROI+e/GvhBDfkGA6vXUe4TwLAqefrgHqkjgDWBE3BlcWqZ/JcDL5VI/xVwZBP3YF/gA+Bd4AzcU7ZPi+7vAOA9YL4Mnq19gIsr7J87Xfc0nZatp/4yF6BlF1JZQV0LfAzsVpA2DfAZMG2F4w7Fl5ofkvX1dbgujwDuAO7C465lLlMdsv8UN7dU/CAmBfExMGNBmvAe1kXA1Q2ce4ZU5gxtuK5TgUOq5BmFx8tTkUwvAxsV5d0SeAZ4B5i3aN+lwA5NyDo1sDYwoE33+E/AYRk8W7cBm1XJMx7YttOy5eWX3qEZgUUKn8OGy8v6glpYMSUVFDBV+mhsnVqXfVL6WsD9NVT20cCbwC5FL/7UeGv7J624EXn54Waxd4B5gN2Ba7KWqQ7Zl06t9nlrzH91UaNlBeAlYDbgo3o/sMBewJVturYtgHEV9gt4uFSDAl+64k1gtrQ9JG2vAlwJ7FiUfxKwYtb3s8K1rgE81uFzzpqeiamr5Nup0n3qTj88eEHJBiowFLgObwzeijdon8CnUXySnq+78YghjcuQdSW0sDLLKah1gXvTCzwJ2DilHwX8psayhwETcbPX99Kxb+Oz2yfiXlgqOqZ/Uor9s66bCte1dKqfqdL2IHzsYau0PTi9lDNnJN/UuNNAVUWBm34eA3ato/xtgPEF2+cAh6f/HwDWqlPeh4EN2lQXs6R70a/M/nUoaICV2P8r3MFA+DjOySn9AOCsgnzCG3QzZflsVqmLPsBkYMkOnnMvYGwN+Qal+zRL1vXUgmt+ArinzL7f4A4+66Vnb830PZkhPUN98bH81/Hx1poajVOcpwGhd8664srIVU5B/Q4Yk/7fBncRBbgdGFFH+X3xFsUr+GJti6f0mYDH8cCXXXkXxMdB3sXdT3PZwwLuA/6Bt3jG4eaJc4vyjCWN52Qg3xGpDj/CXYx3pox9H580eUM9dQ1Mmz7Gs6QX60O+6WX8Ag/JU2tZywCvAn3bWB9PUKZFipufyr6beIPpoVSPz3XVY/qoPFuQb1bg/ayfzRrq4rfArzp4vltwb8ta8l4C7Jd1HTV5vXOk9+FdYP6ifX2A14Dv1lDOINwKNZkCc3rNclQoeIkSvyWBe7OuvDLyllNQ/wSWTf/3A17EJ1J+Bgxu0blnT+c5CO/iv4uHtZkO77X9vBXnaXF9zYM7QQxISnYb4NfAoKJ8I6hiCm2TfANxM8F3cZPbbnij4qpiJYSbG94vfpFqPM+leOt4fzx0TVf6MOCpOso5FTiqzXVyKvCTEukr4eNMFXvruPv4+xQ4Z+ANrw+B2dP2KsCDWT+fNdTFsuldbnvjj296rxXNewX5R2bxzrT4mndK79ofSQ38gn1rUqeJFfdKPa9uOSoU+AnuNnle0W9yhypoA+BZfD7G4TXkn0JBpRfyDb49drRferDLOlU0KO+8eO/qKb7tLTY7Pq6xa9YPXZG8B9bywOBK/S1gkQ7LtydwQ1HaVMCjwOii9GuBnzV4ns2S4nsSWLMgvQ9uxq2q9HBT5Pu02bMMH4e6oUT6X4H9ayxjChMh7u7eZdbdBbiok/e6wboQ7uSxcgfONZqCxksN+fvj47gLZl1PTVzzhcDeqcHydNE39ByqOOyUKG9Q+u6OrOu4CgU+SImxh1IvSBsqpy/wArAA34wtLFHlmFIK6kDgz0Vp0+Buqie1Qe6ZgIEl0hdLH7v1sn7wCmS6p9aHBTgZOKZgexje82jIrlzD+frgjZM1SuxbHO+hLpG2N8J7rw25w+M9tY/T+Yp7ZudTg3kTjy4wvpHz1ynrLEnWfgVpq+HmloZdm4FDgNPS/8fQ5p5gC+vjp8DZbT6HcCeALes87lTgF1nXUZ1TcBYAACAASURBVBPX/CY+VKGkWJZL+6bGXennbKDctdKzOn3Nx1QobDBttKdXuZCVgZsKto8AjqhyTCkFdRMl7MZ4iPzFO3xN38cHDMu6tVc4djNg7RbKMld6yGryUsPHV17C3UdPw3tU5+O9jna4VG+GN5BKmm/wyZCPJ3leBNZt8nx/oKhXltK3pgaPLLwHVtcHrAlZvx6Hwhtyj9KkWzOwIvB4+n8sTbiYd/KHm34/pIGxjRrLnxafcvBovQ0APGzVFI2e7vDDzeovFmz/itSgT+/EzU2U/UeKOg2VfmUjSZgvEfAVfB0ippPMhWvaLiantG8habSkiZIm4q6zhfumxbuntxQfZ2bXmtnTrRW5MmZ2B95rOaiBw48CrpHUqmCUmwPXm9l/asz/GD5m9wJuvlgCHxO6DbiqlXHjUpihw4ATLD3RJTgHf/kfwRsmE5o87YFmdlaJ9AnA6pKmriDvwnh9XNekDLVyB97YATc9fQzUuvBfOf4ODE0LAjYT5qijmNnb+Mq8u7a67HRfHwC+AlaxFBi2Dh7EGxDDWi1bB1gXf/a7uBjYLkXp3wlX2o1yGLCOpHVryl2j1psMLN9BDb4VBVo2VUrJ2fAFeSYWbW8C3Jp1a6RIpoVw82LNLqi4Yn4f78VMpgXeQfgE3I3rPGZVYHhRWl98/OMCWtRSxN34n6NK7x3vPY2jzZE+Ul2VdR0HjgN+28FnaAvcW3FmfJxj6RaVezNuWfiIHLuYl5B7lfS8tCpSRT/cW/QdfAym4eca9147nxJm/zz/8NBUWxalTcLjlH5EkSNVA+WPwnulVe9ZrbH4DgNuljRFkEZJm0i6t8QxzTAZ9zLrYm7c2aEeRuKtq9xgHoNtLL6qaK1sAEwws0fx8YaDJP1a0uKSBtUrg6Q58C78zfUcZ2b3mtkDRWlf4XHRFgd+0+xicqn3dATu3v1VFXk+NLONzAOstpMb8fh6U5CudxfcmahT3IUr8WOBy83ssRaVeyeu/P5nZh+0qMxOcD/eu6+tRV4GSVNJGo33zPfAI2+caemL2iBn4c5Tb0q6QNIGNcZTLJatr6RJkq6WtENa8qMlSFqwMDiypKnwxuhtRVkvxs1z48zjRTbDX4H/UkvQ3zq03sq4khiDm3l2wz3WnqPF82T4xh18fr5xkqg4KY+CHhQ+sPcqsFjWrZEScs6K96IWqDH/lcAuBduz4e6fz+Br73xAGuCusbz9gL+04Zruw81cDY1JpXt2Ct6yqsmdt0P3a2ng+TL7lgH+mYFMT6RnqGU9Hbzx8zndwMW8hOx7UCKuYh3HL4A3im8EVmuDfHPiDluPUcUSVOb4tdN7sQvusfoJbgJvVq4ZgP8DjitIW7PUM5Cu4X+0aCI6sH76hpWceP51vjoLXRJvrXyMR1DYmhZ1rUucayTunfUCRX74ZfIXKqi+eKs3lwOUwM+pbVZ6fwomj5bYL3xC3fvA0BrPfTuwaRuuaQDubPAcsFSdx/bFx5Xuo00D3k1cl/CG2UIl9v2knsZBC2U6iBbHe8O9Gb+gQjDUvP74xjN3aIPH7w+c0wE5Z8K944bXedzZFLh142b/D5v99uKhzG7BOxo/SmnHUmYCdGrEtMRxLr1Xd1HQ+C6Zr8bCZsNDW3yAe8bdiIcPyk04D1o8r6nNsk6bPnr7U2GeDR7mZ1IN5Z1S2AqqkG8B3IbcNps4bvJ7FzcLT1dD/gF4VO0JNGnbbuM1nUuJeUakcZus5Wvhdd5BN3ExLyH7ydQYuqzEsRfjK+R2Qs7t8Z5UTSHQ0vvxPlMG9H2OJkM9ped3K3w45RV87G0iJaZ3tKkuVsO9g8t6E9c6ZvACMB/u6rw+3jt5CHhQ0hI1lhEkzOxfeOSGVYH7Jb0s6cwS3mIj8AHLapwJ7J7sx98i2a9HSLoa93o7ztICje3AzC7BH7xlgRck/ayUzVxSP0nb4J5SA3CnjWZt2+1iPD4W+DXpXq2Mf9R7Csfh5uPuyBn4OzCwgWOH489hJxiLT9M4uMb86wJPm9mrRen34Q4iDZHWtloRn9f6Gv58n4BHIb+/0XLrwXxV4mfxmH1lM9Wi6UqOl+AhYt4F1u+Exq0iY7fpQRXJLdxN+QaKuta43XnVGsuZQNH8FXxi57N4ENPRtCi0Ux3Xthju4fchPgh/WnpmfoSH5rkb9xxri5m4hdcxE273H1iQti5lAmnGL7P7NIE6TZ98E6W8Y88gPgH2PWqLUvIXSnjupvfovCZk2I+iiCF4NP9DO3zPhuHjfyXHnVtxgnWAdzp5UWXk6JYKqkD+OZKy/07a7ppMW3EQseD4URTEScSjMYynQbNHi69tCD7QexBuLrsQWClrueq8hvsoWNwSD1Z6ZNZyxe9b92hb3OO1nmM2pomJp03IejhuHSk7To5HbSg5Bg0sRUGQ3wbOfw9Fa4RleN9+QJnoEk0v+W5mt+AmnaAJzOxN3P387OS+3OVe/mWNRVwPzCdpmbR9GB7/qh6X9rZgZu+Z2a1m9jsz293MdjazB7OWq06KzXzFkxmD7PkrsKykoXUc00nzXiEn4Z5xO1bIMxJveL9dYt8/gNklDSmxryKS5sWtG3VNNWkXZnaOmX1cal/TCiqd4NlWlBNwNj5zfW9qH38CICmyPwH7SFoNd2vdtg4FF1TmawWV7Pfz4eOwQU4wH1sdS32RJYbToTGXQszsv7icJ0mas0y2bfGYl6WO/wqPVjG8gdNvg68YXWskmcxQ6mJ1eyRNNLPuGFbkWySnkztxF/PFzOytOo6dA3cZ/QyPLVezggsqk8K8vI07f6yOz7Qfla1UQTHJgnAtPm5ecbJ3uqcf4mNB73dCvhIyHIWPw2xsBR9jSdPhYzPzW5mJ0+nYAWb20zrP+Qjutl48GTd3tKQHFbQOM3sKn7H9XD3KKR37JnANPngayqmFpI/dzfgEwzDv5RTziCvv4WOe1VgCeDMr5ZQ4Fh9v3qUofSvgrnLKKVHVk09SH0nzdcXLlLQovgTQnY2L3DnqDrsRdISj8ZWAG+EH1lO6xfljPO51OByP8Bzkk3PwgfdqYyxZjT99jZn9R9KuwARJd+KedHvjynP7Koc/CAyT1D+ZDL+FpO/h35GhwGBJb+Ehhq6o1rvMC9GDyiFm9j8z+7DBY0M5tY+b8fWn/oPPDQzyySXA+pJmrpIvcwUFYB5P8VR88u3e+LzGeauZ4JJjwUt4OK6vkTSvpEvxejgZn24yLbAGHhbq6FZfQ7sIBRUENZJMrv/AvSujIZBTzOwjPNJ9JQ85yImCShyLj5utZWaX1+HA8C0zn6TF8HmPz+Br3l1izn/N7CUzu72K2TBXhIIKgvo4Fve2DPLNn4CDy0X+Tunz4YF3M8fMvrIpo0XUwtcKKl3TtcDhZnaUecSabk0oqCCoAzO70swezlqOoDLmYXTGAWcVLidRwAp4nMvuPg3jfmDl5JF4Cb4S+XkZy9QyQkEFQdBTOQRYFB93KSZP5r1meA4fXzoHj0j/42zFaS2hoIIg6JGkibvbAseWCGrdIxRUGgu9D3eA2LqUN193JibqBkHQo5H0AzwO5P74JOs18QnXi5UJI9StkLQU8Ln5it09ilBQQRD0aNIY1B/x5SVuT797ysV/C/JDTNQNgqBHk8xg+2QtR1A/MQYVBEEQ5JJQUEEQBEEuCQUVBEEQ5JJQUEEQBEEuyZ2CknSUpNclPZp+I7OWKQiCIOg8efXiO8XMTsxaiCAIgiA7cteDCoIgCALIr4LaX9Ljks6VNGO5TJJGS5ooaSIehyoIgiDoIWQSSULSLfiyw8WMweNjvQcYcAwwh5nt3kHxgiAIghyQ61BHkoYC48zsOxmLEgRBEHSY3Jn4JM1RsDkKeDIrWYIgCILsyKMX3wmSlsFNfC8De2UrThAEQZAFuTbxBUEQBL2X3Jn4giAIggBCQQVBEAQ5JRRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEt6jIKSND5rGYIgCILW0WMUFDAkawGCIAiC1tGTFFQQBD0YSUtLOiVrOYLOEQoqCILuwp7ALpKUtSBBZwgFFQRB7pHUD9ga6AfMn7E4QYfol7UA5ZD0MvAp8BXwpZkNy1aiIAgyZG3gReAtYPn0f9DDya2CSqxpZu9lLUQQBJmzPTAWGAwMA67IVpygE4SJLwiCXCNpamAT4HJgIt6DCnoBeVZQBtwsaZKk0aUySBotaaKkiYSbeRD0VDYCJprZm8AkYPlwlOgdyMyylqEkkuY0szckzQpMAA4ws7sq5J8Y41RB0POQdDVwvZmdl7ZfBdYys+ezlSxoN7ntQZnZG+nvO8A1wIrZShQEQaeRNAPuIHF1QfIkwszXK8ilgpI0raTpuv4H1gOezFaqIAgyYHPgFjP7uCBtIu4oEfRw8urFNxtwTTIz9wMuMbOItRcEPRhJcwHnAgsB/wSeBdYEflmUdRJwaGelC7Igt2NQ9RJjUEHQfZE0AldOp+Mu5IsAiwJzAGPM7IuCvLMAzwMzmtn/MhA36BB57UEFQdALSBEijgW2A7YpcIR6Fri+1DFm9q6kD/mmpxX0UHI5BhUEQWnk7COppzgJHIM7QC1byUu3BFM4SoTrec8jFFQQdBMkDQIuw3scB2QsTlkkzSJphxryrQ7sAmzdQMSYbzlKSFoMeEXSUnWWE+SYUFBB0A2QtDDwAB6fciVgpKS+HZZhTklLSVpD0qgkUykGA7+Q9LtkwitV1gzAhcAeaSpJvXzdg5I0M24OfAo4roGygpwSCioIcoykgZIOAO4FTsU/6P8E3gSGd1COdYDngEtws9wulIkqbmYvJNkWB25IyqiY04EbzOzGBkWaBCwnaSBwFT5XclNgcUnfb7DMIGeEk0RQktTyncvMXslalixJvZQ+ZvbfDp93IL7+0eG4OWtdM3usIMt1wMa44uoEKwOnmdlhtWQ2sw8lbQicBDwg6dfAZOB1YBVgOZqYbGtm70t6H7gW+Bg4wsy+kjQGOEHSStYBF2VJQ4AvzOyzdp+rNxI9qKAcOwGPSpona0EyZgTfjmLQFiStIOlHks6UdDv+MV8H2MTMNi1STuAmrU3aLVcBy+O9lpoxsy/N7EDgV8BI4CjgBtwMt4OZfd6kTBOB2VNZX6W0y4C+wFZNll0rvwcmpIC230LSvJJGdUiOHknMgwpKIulKYB7gCzzu2VdVDumRSLoWuM7MzmnzefYElsLdq58FnjazyRXy98F7I6t1IiadpMnA6maWm3WYJM0LfF7sYJHMkWcCi7e75yvpReA14G1g2655WZIWBG4FZgIWNrO32ylHTyUUVDAFkvoD7wJLABcBt5rZr7OVqvOkyAZPAPPm0YQj6WzgKTM7pc3nmR13QJi5E2azViDpJrxhcXobzzEr3piYEw9ofZeZ/TR5FN6Cj9UtA3xgZmPaJUdPJkx8QSlWBZ5LAXt3Bn4oqWMD8jliN+DyPCqnxHV0xsy3PDCpuyinxBjg0DZ7Oq4EPGhm/wZGAVtL+hVwGx794k/AicBeXbFFg/oIBRWUYkPgRoBkZtoHuFjS4Eyl6iDJhLYHcHbWslTgVnxtpBnbfJ7l8fGeboOZTQTeB9Zq42mGAw+m872Lvzc7Aweb2QUp/QX8PpVc0y6oTFkFJamvpM0lbVY4l0FSpwYfM0HSXJIO6OWz0kfig9kAmNnVwN+Acb1ISa0LvG9mdTkGdJLkZHAH7sjRToZRp4NETjgX2L2N5Q/H56YBYGbPAvOZ2WVF+Y4HDpY0oI2y9Egq9aAuxF1BlwHukbRQSt+n7VJly8bA74Dz01hMr0LSUGAWpmwx/xD4B3BrmhjZlX8aSUdLOqtjQnaGPcl376mLmr35UoSHyZJul3RwwTtdjW7Xg0pcAoyQNFOrC06mw2HAQ4XppcygZvYI8DRQMbpGb/zeVKOSgprTzH5mZkfhgRz/3EsmwC2D26+HANem9ajajqTF68h7eBqgbQcjgPHFUaLT9r64ueKuFFVgW+AZPOr0WpLWbZNMUyCpn6Qr2vTxmQ1fJO+SVpfdBsYB60uau4a8xwN/BU7GHWDukfSQpFXKHSBpTmAg0O3mw5nZB3jPf7s2FL848LaZvV9j/t8AhyXT8RSkMd7bWiVcj8HMSv6A+4CpCranw1+Gd8odk+UPmNiicu4HVgP6A+fhNuYhZfJOB3wHf1gXBRYGBjdwzs0AA5aqIe/cwH9x76F521CP4/Co0pXyHA58DjyCuzmDLyz3GNC3Q/d7ZKqzfdtQ9mHAOZ24jhbJezjwHr5G0oAyeVbB3dIHF6T1AbbH51xdAMxe4riNgZuyvsYm6mZd4JE2lLsHcGEd+YX3Qjcrs38q4CNglqzrrAP3ZCV8nuU26buxETCwZN4KhQwvfmDxCXA7Zn2BZeRtWkGl6/sMmD5tC/g1sH6Z/N/DzV5PJ4XxPB74sp5zzg+8A9wMHFVD/u3wVvCBeKt2sRbW4dTAJ/g6O9XyLlKojFJd3QPs2qH7fVmqhwdbUNaewJWpvHHpYz+8xfIumBRIvzbVx0L4uOHT+Ly1wn39gEeB7cocOx1wQrrudYr2HQUc24l72qZ66ZPek2VbXO7ZwH51HrMCsGCF/VcBO2ddZx24Jzvi01cuw0NUjQNmKpm3xgLnyvqiapCxFQpqUeDFDso8FfAwcBDewn2ihmNOB36c/t8Fj8m2XIvk2QCfy9Ho8cPx1vg0ba63GfHwNkOAN/AJmc2UtwIeeWAzvDXXEuUEzAwcjI9TvA2cRmr8tKlehMejezV9QLsaWgfg5iNVOX4j3GTbryBtHLB5O+9nu39Jyf6hxWU+ASzf4jK7pjVkXmdNXse0uNt9n6bLqvGEk1t9M9pQKa1QUFsDV3dQ5j+kFoRSS+9NYJEqxzwOrFSwvQVuupm5BfKcChzeZBmX4XNA2llve3e9yHjL/zedumd1ytnVK1uPNvWcypx3MB5J4TXc7fldYIkajhNwOx6Qtmv7TdwzLfP6bKI+huK9w5JmpAbr9zOgf4vlnA34kDJm2jz/gEG4E8jVqfF4My0wV9Z68h3wOQVblti3CXBvDiqoFQrqWODIDsm7JfAiBeY04I+VFATec/i0+MXAB70vr9ZCriLPHOmDVnUcrEo5C6SPQdt63bhr78j0/5KpAdWRsa86ZByYXtSmGw5NyLAm8EI9ChxfPHAyMA0wV1JuDT9XefnhkR12b1FZawH3tEnOByky0eb9lxoyt6Y63pUy5rpGfjVN1DWzi/Hu/x8kjZHUX9Jukp7CoxVfXEs53YBlcFt9J5gad0b4sCDtKnzQsByrAA/ZlPHFxuDOGts2IoikjYG/42ahJxopowvzWG2/BZ6UdK6klVo5pyyFkZkPb6FhZv8A3sK97vLEWsBjVruXV8sxs9vxscIj6jjmIdxR6Id0zwgS5TgMOF7Ski0o61vzn1rMOPxb253YCp+asoGZnW/uPdka6tSUS+Jd249xj5StaYGdsUVavBU9qNeBoRleQ3+891HSOw+PAn10mX3L484WZXsueEtnFjyA5XS4qeJU4GVg1RZfy6y4U8Dz+DjbDC0q9zjgt0VpBwAXZ3Xfysh5BvCTrOVoUPZF0nN4GvCrrOVp4XXtDPyz2WcRDzE1hTWpRTIui6+71S16rbhp7zU8kHDLy6+pByVpNkm/Ae7G15+5F/g/4HYrmi/THUit+nmK0mbBB/cym+9h3jO6Hh9gLMVq+D0odewk/INybqkei6RF8S748+n3Bq7QZgaWMbOWritkZu+Y2Qn4x+6fuNdhU6TJkTvhLtGFjAU2kjR9s+doBan+N8Y/ZN0O8wURr8TnvXXHCBIlMbMLgfF42K6Gwryle7sSKcRRG3gUN68u0qbyW80Y4E4zu6sdhdd6k17AzSprm9n6eMyph4AHJS3RDsGapcrExZNwz55ClgYetdQsyJCSZr60gN2yVDYtHIePU92WIgUsllZkPRpvVFyPj4nMZGbTmdlAM9vezD5qw3UAX0/wPRI4oNTKqpKOk1RrnLK1gTfN7Mmic7yH28DzEoZrWXwZiGezFqQJjsbHnR+qlrGb8WO81X9Ug8cPBb7Ex+laTvr+VDTzSdpA0quSHpM0XtL5kr7TDnkqkRq9e+KWkvZQYzdugTLpe+GDqCXnCXW4qzmx4P+++Ms1d4l80+Fmyg+BQQXphwC/z8F1DMQn7M1WlL4a8HANxw/CFdxZeNf7c+CKUnXR4es6nyIHFDxqxSt4b25khWPnwT+Yb1BmoBt3rx6X9f1LshwFnJi1HC24jpZ6qeXlh5ufXwdWbuDY04DT2yzfRrh1qtS+GXDluCneEBqBO1d11MSNDxfcRJry0rbztEDQdchBdAmKxqDwMDWjS+TbEJ8Tch2wW0H6RYXbGV/LpcWyAz8FTmngIZoiOkBG17QQPq4xQ9qeMSnQtfDlxN8Bli46ZmW81/c+Plb23Qrl9yMn7rm4WWyNrOWIX8V7tAvuDFLzWA8+zvsWbfbMxE18n1BirAwPgHt6UdrsqVHbEjf6CnJtia8gfBM+1+7Rdjdiml5uw8xuwVv3eeMGPBxOMWvj5qDiSMdL0zkPvmpcAexfFDn8e5QZfyqHOW+1VLIGMV/19Xp8UjL4g36tmd1mZvfjjg7XS5pD0iKSrsLnVF0HzGNmB5hZWQ9D8+XF/9Pmy6hKMi0PxU2qQX75Cz5RviazcBr/PAM4wtrsmWkepf4u3Cz+9Tda0gh86sBhRfnfwr9d67VLJkkr4fM2X8Hf3e/jc2PbumJx5i2ZFmr34h7UENzbcKqi9MdwN9H++Oz+RXCz2r+L82Z4LcK77ffgJru+lDD7dbcf3/SidsEdNaYt2v8zvGX2Lv4STp21zA1c4z7ARVnLEb+a7tWa+FzEqj0PfDjjHjrktYzH93wIV1RLAtOnd2OdMvn3B/7SJlmEN7g6bmHqsQsWmg+c/wNYvSstRQCfD1dm/8VbUbviD8BzZvZ/GYg6BeZPxf6499t1eCiet83s7UwFaxL7phd1Lh6z719FWX6NK6bFzex485VKuxvd1nuvt2E+T+xJvPdelvTdOAYPTNwRr2Uzexo3cY/F1/y6HfibucWqFFfjnqxTtUGczXEP5wvbUHZFeqyCStyIjzl1sRbuEvll2j4Pb80vT37Me8DX3m974qFmxuGtt57Az/CAmFNcjzljU+OiWyBp1TRt4TtpyZTVcBt90D04FF8GY0iFPCfgvZPHOyQTAGb2lZmdAXwXH7L4SYW8b+DKtqVL3qRFFo8HDjGzr1pZdk3nT124bo+kiWY2rChtWeAyM1skbZ+NB2T9Q0GeB3DPmLPN7KROylwLaTXjM4BrzOzGrOUJvo2kcbhX2LT44PY9ZrZTtlIF9SDpVNzMP9qKPoiSdscjcSxnZp9mIV+tSDoQj9q+awvLPBg3K25YNXMb6OkKSrg76Rpm9pykF4GNzcPjdOUZDfwJvwm3dlToIAgyJy16eQduqRjTpaQkrYoHc17dzJ7JTsLaSA46j+Oeu007DKV6eQb4vpk91Wx5jdCjTXzpQbsRGClpATz+XXFFX4a7MefKxBcEQWcwjx23Fu71e7yceXFv2p27g3ICMLPJuEKpOS6lpEHpp7TdR9Kykg7D411elZVyAp870tO5Afes+hy4rbgLb2YfS5rD2u0uGQRBbjGz9yStDUwAfgesgU+2Hp+tZHVzBe46/7dqGSXNj8/ZGwj0kfQBMAD3tJ2AT47PdDy1R5v4Uvp0eASCO/C1ns7rtGxBEHQPJM2If5SfwNfF6lYfyNTzexRfmWDG9HsZOKzQAzHNr7oFGG9mJ0iaGo/L+b/kcJELeryCSvtuwbu985nZq52VLAiC7kSalPu/7qacupC0L75SwYfpty8+//OggvG1fXAP5lWz8M6rld5g4gMfhwrlFARBVfL8wa4FM/tj4bakm/EoND8GTpQ0FJ/XtVrer7W3KKgLCCeIIAh6IWb2UQqTdJ+kN/EQb79Nk4FzTa8w8QVBEPR20pIcd+ELIq5aELAgt/SWHlQQBEGvxsyelLQ68FF3UE4QCioIgqDXYEWLfeadHj1RNwiCIOi+hIIKgiAIckkoqCAIgiCXhIIKgiAIcknuFJSkoyS9LunR9Cu1bHsQBEHQw8mrF98pZnZi1kIEQRAE2ZG7HlQQBEEQQH4V1P6SHpd0boouXBJJoyVNlDQRDxkfBEEQ9BAyCXWUoovPXmLXGOABfD0SwwMazmFmu3dQvCAIgiAH5DoWX4q6O87MvpOxKEEQBEGHyZ2JT9IcBZujgG4VmiMIgiBoDXn04jtB0jK4ie9lYK9sxQmCIAiyINcmvmaR9CTwRdZy5Jwh+JhfUJ6oo9qIeqpO1FFp3jOzDYoT89iDaiVfxBpRlYl1tKoTdVQbUU/ViTqqj9yNQQVBEAQBhIIKgiAIckpPV1BnZS1ANyDqqDpRR7UR9VSdqKM66NFOEkEQBEH3paf3oIIgCIJuSiioIAiCIJf0SAUlaQNJz0p6XtLhWcuTByTNI+l2SU9L+oekA1P6TJImSHou/S0bnLe3IKmvpL9LGpe2o46KkDSDpCslPZOeqZWjnr6NpIPTu/akpLGSBkYd1UePU1CS+gKnAyOAJYDtJC2RrVS54Evgx2a2ODAc2C/Vy+HArWa2MHBr2u7tHAg8XbAddTQlvwfGm9liwNJ4fUU9JSTNBfwQGJZiifYFtiXqqC56nIICVgSeN7MXzew/wKXAphnLlDlm9qaZPZL+/xT/oMyF180FKdsFwGbZSJgPJM0NbAj8uSA56qgASYOB1YFzAMzsP2b2EVFPxfQDppbUD5gGeIOoo7roiQpqLuC1gu3JKS1IpCjxywIPArOZ2ZvgSgyYNTvJcsHvgEOB/xWkRR19m3mGzwAAAzJJREFUmwWAd4Hzkin0z5KmJerpa8zsdeBE4FXgTeBjM7uZqKO66IkKSiXSwpc+IWkQcBVwkJl9krU8eULSRsA7ZjYpa1lyTj9gOeAMM1sW+BdhqvoWaWxpU2B+YE5gWkk7ZitV96MnKqjJwDwF23PjXetej6T+uHK62MyuTslvdy1xkv6+k5V8OWBVYBNJL+Om4bUkXUTUUTGTgclm9mDavhJXWFFP37AO8JKZvWtm/wWuBlYh6qgueqKCehhYWNL8kgbgA5PXZSxT5kgSPmbwtJmdXLDrOmCX9P8uwLWdli0vmNkRZja3mQ3Fn5vbzGxHoo6+hZm9BbwmadGUtDbwFFFPhbwKDJc0TXr31sbHfaOO6qBHRpKQNBIfS+gLnGtmv85YpMyR9D3gbuAJvhlf+Sk+DnU5MC/+Um1lZh9kImSOkPR94BAz20jSzEQdfYu0ZtufgQHAi8BueIM36ikh6WhgG9yD9u/AHsAgoo5qpkcqqCAIgqD70xNNfEEQBEEPIBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEOQESfNK+iwFPA6CXk8oqCDIEEkvS1oHwMxeNbNBZvZV1nIFQR4IBRUEQRDkklBQQZARkv6CRxS4Ppn2DpVkaXkGJN0h6VeS7kv7r5c0s6SLJX0i6eEUmb6rvMXSIngfpAU7t87myoKgNYSCCoKMMLOd8HA3G5tZVwicYrYFdsKXjFkQuB84D5gJj+12JEBa7mICcAm+hMN2wB8lLdnmywiCthEKKgjyzXlm9oKZfQz8DXjBzG4xsy+BK/B1vQA2Al42s/PM7Mu0OOVVwJbZiB0EzdMvawGCIKjI2wX//7vE9qD0/3zASpI+KtjfD/hLe8ULgvYRCioIsqVV0ZpfA+40s3VbVF4QZE6Y+IIgW97Gl1BvlnHAIpJ2ktQ//VaQtHgLyg6CTAgFFQTZchzws2Saa3i8yMw+BdbDnSreAN4CjgemaoWQQZAFsR5UEARBkEuiBxUEQRDkklBQQRAEQS4JBRUEQRDkklBQQRAEQS4JBRUEQRDkklBQQRAEQS4JBRUEQRDkklBQQRAEQS75fwFP7nBsShUcAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "np.random.seed(1)\n", + "data = np.random.randn(100, 3)\n", + "for t in range(1, 100):\n", + " data[t, 0] += 0.7*data[t-1, 0] \n", + " data[t, 1] += 0.6*data[t-1, 1] + 0.6*data[t-1,0]\n", + " data[t, 2] += 0.5*data[t-1, 2] + 0.6*data[t-1,1]\n", + "# Randomly mark 10% of values as missing values in variable 2\n", + "data[np.random.permutation(100)[:10], 2] = 999.\n", + "\n", + "# Initialize dataframe object, specify time axis and variable names\n", + "var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$']\n", + "dataframe = pp.DataFrame(data, \n", + " datatime = np.arange(len(data)), \n", + " var_names=var_names,\n", + " missing_flag=999.)\n", + "\n", + "tp.plot_timeseries(dataframe); plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -621,38 +659,9 @@ " Variable $X^2$ has 1 link(s):\n", " ($X^1$ -1): pval = 0.00000 | val = 0.606\n" ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO2dd7hdRdWH318aAUJoobfQm1IDBBCQTkINvTchdAFFilEBURCkqIAgSBUIHYGAgdB7SZAmRTqE3ovIp+D6/lhz4XByetv73rve5znPvXv27Nlrzy5rZs2aNTIzgiAIgiBv9MlagCAIgiAoRSioIAiCIJeEggqCIAhySSioIAiCIJeEggqCIAhySSioIAiCIJf0y1qAckh6GfgU+Ar40syGZStREARB0Elyq6ASa5rZe1kLEQRBEHSeMPEFQRAEuSTPCsqAmyVNkjS6VAZJoyVNTL8nOyxfEARB0EaU11BHkuY0szckzQpMAA4ws7sq5J8Y41RBEAQ9h9z2oMzsjfT3HeAaYMVsJQqCIAg6SS4VlKRpJU3X9T+wHhAmvCAIgl5EXr34ZgOukQQu4yVmNj5bkYIgCIJOkksFZWYvAktnLUcQBEGQHbk08QVBEARBKKggCIIgl4SCCoIgCHJJKKggCIIySOor6XZJc2YtS28kFFQQBEF5VgS+D2yTsRy9klBQQRAE5dkEeBDYLmtBeiOhoIIgCMqzMXAIMJ+khbIWprcRCioIWkQarxiQtRxBa5C0ADALcB9wBWHm6zihoIKgQeT8XNKNkp4FPgduy0COWTp9zl7CxsANZvY/YCxh5us4oaCCoHHWA3YAzgQ2B2YFviNptk4JIGkQ8KqkrVpYpiStkJTvDK0qt8y5ZpC0VjvP0QQbA9el/+8HBkv6boby9DpCQQVBA8gDRR4FHGlm15nZP8zsY3xpmBEdFGUd4A3g1LQ0TcNIGiRpH+AR4FJgOB4Tc6rmxSzLXsCFqT5zg6TpgZXw+0nqRV1K9KI6SiioIGiM9YDBwJVF6eOAjToox0bAqcAFwB+b/ND/ERgFHAosjHuwfQCcK6nl34ok6054z3PhVpffJBsAd5vZvwrSxgLb5k2Z9mRCQQVBnRT0nn5pZl8V7f4bsE4nnCWS0tgQuB44ElicBgfyU1kjgD3MbIKZ/S9d247A/MCvWyP1t1gKGARcBuTNzFdo3uviUeC/xNp0HaNXKihJW0haOWs5gm5Lud5T1wKbTwOrd0CO5YGPzOwFM/sC2BX4vaTZGyhrOeA9M3u1MNHM/o33pLaQtHezAhexI3AxcAs5UlCS+uHKelxhuvny42OB3aIX1Rl6pIKS1E/SuaVs5+nBOhG3rc/VeemC7kyV3lMXnTLzbUTBR9TMHgbOBk5poKz1gZtK7TCz94CRwC8ltaT3IKkvsD2uoG4H1myHGbFBVgVeMbPJJfadA3wPuFvS9zorVu8jLw9ESzGzL4GhwKYldg8H/g2cBlwmqX+rzitpbUnzt6q8IJeU7T0VMA7YuAOt7G8pqMTxwIgGHCbWo4yCAjCz54F9gEtb5Nn3feAtM3sq9do+BpasdICkWSSt1oJzV2NT3Gw6BWb2Gr5W3VnARZJuiAm87aNHKqjEn4EflEjfHrgEOBZ/KX7TipNJWg+4ETiuFeUFuWVv4MQKvSeAx4EBwGLtEiL1/ucH7i1MN7NPgWvx57zWsgbjJr67KuUzs6vwZ/zPLVC+OwEXFWzfRgUzn6R5gXuAq5IJri2kBmtXz64kZvaVmV0ILArcgfemYlyqDfRkBXUNMEzSfF0J6cHeGhib3EZ3AjaXtEUzJ5K0PP5AjwLWTS9TUCfJNHtWXieepo/XmvhHuixprKIlZj5J00o6QdIESdMV7NoQGJ+sBcWcj49H1cqawINFHmvlOARYAO9NVUXSypKuk/Q7SdOktGnwXsrYgqxlFZSkRYC78flmL5XL1yJGAC+Y2T+rZTSz/zOz3wKjgRskbdBGuXolPVZBpcHdscBuBclrAy+Z2QspzwfAVsAZkpZr5Dype389sKeZ3Yi7+x7QjOy9mB2AXYATshakDCviz8/bNeRtWkFJGgE8CcwJvA5cXTCuWsq818WdwAySlqnxVGXHn4pJzhjbAEdLWqqC7CtLGo+/gzcCswGT0nu2Ca4Q3yo45HZg9TQ2VVjOMngv5WgzOwWfi7RtjdfVCLsD59ZzgJldjyvcCyTt1BapOkAK1dWxSeY1YWY94gdMLJG2DPAK0Ddtnw8cWCLfKOAtYKk6zzkEeAEYXZA2FHgfmC7rOulOP6Af8Bzu3vsasHodx/bBP7JH4q3ZDfFxgv4tlvFo4Pga804NfALM1MB5huAf9heA9Qrq52r8Az1ttbKBXwK/q/F8LzTw7P8IuKjMvpHAm+leDChI3w54B3gZ2LHEcU8Cwwq250/v5ZYFaXPhc7OmasMzOCvwUaPvLrAE3pBYs9WydeKXnpmXgD4l9vXBG+Ln443IeTsiUwVhBwL7pRdlPD5usx8wddYVWUbeKRRUSp+UPl5TAx8Cc5TJtw0+I3/xOs55AnBmifQrgB9mXSdN1ud3gTEdPN9OwJ3p/y2AfxR+3MocMwfw0/RSPYKP//0Zn4v0LB6eZs4Kxy8JHAM8BexSg4z3A2vXcU3XUdB4Kdq3Zfog7wz0K0jfMH3kTgKmKTpmIN6buL+rriqce8GkDKrV4ULpuVed92vm9DGfscS+8cDOZY6bB/gDMKjEvj8AhxZc6yTgoBL57gQ2acMz+CPg/CbL2BBX+NO2Wr52/vCoGW8l2VctsX+N9J7sA1yenq330u8j4FNgr5bLVUHgy/EW0LzA9OnvnsAVWVdmGXnLKah907VsCUyoUsZOwGRg4RrONz3eU5qvxL6V043um3W9NFGfVwD/AYZ04Fx9k0JZO20LNwsdVuGY7wDv4t5Uy5fY3wdXXq8D3ytIH4KPozyJ99ROAg4H7qgi44zpJRxYx3Utn17kBYrS50npo3HHhGfTs3cWrmzXqPLcPUIJS0CJvHcBm1XJs1+jH2W8N7dfUdoC6b7UXE8Fx26Gj6sBnJGewSkUZ/pIXtLiZ1DpmShb93WUdRFwcivla+cPmCY9g1sCY4A/lshzDnBIwXYf3Gw7CzBT+ua9TpUGUYly++Hj97OX3F/hwLvqSW9DpW2QKu154PAa8pdTUDPgGv4OYLcaytkb98KaoptblO9Qypg40v4HgFFZP3xFMs1DbT2FBfGW0TW1fAhbINcOuIeWCtIWSDIMLXNP/wnsVEPZI4C3gV+kD8dH+Djhal33GO9df1TuJUl5tgD+1sC1HQg83PXi4sr4DuCItC18bPQ2fA7T4BrK7Fvt+Uz5dgeuKdgegk+Onbsg7Vpguwbv2zp4dIXC+3Y87uXYSHkz4qbL3dL9LVkXfGOKa1kvBVgBb1TW1ZMsU9YQ3MQ5vFXytfOHh8q6KP3f1cDoX7B/GipYnwryTQB2rfPch6fjStZ7pQN/hZv1dsIHNbvcQo/tQIX1TQ/LAri77mPAElWOKamg0r6/AF8A09dwbqUPypYV8kyFm0XK2u1xk2FHlHkd9Xoh8CVpXKNCvlNxN/y1Ut03/dJWudfPAOuW2DcGN2cNLUjrg5vOTq3jHAvirfGDKDNuk571fSuU8Sfg4AauT0kJnJS2D8NNVG3vXQPTpQ/5KNxU/xHeM/0AtyqshU+1mKXB8vvgPb7l0/ZUeM+wqgWiQpmT8J7qd6vkuwnYuoV1dQbwsxaWtw1uEmv5WFmLn5F1cUvCjAVp9wEbFmxvTw2Ns1TWk7V+L/Axu5KN0K/zVClgSbxHcUT6+50OVdrKwE0F20eQWpwVjqmkoJbFvYBqPf9GVOhF4fOrKt4wvOv6FLB31g9hkmdRvGU0Cu+Kl/wo4WMLH+DjO10foOXaKNcO6YUoZcrph7ew3sM9I/vgPaF7qNOUUIMco4Dby+wTPrC/ZINlz4Q764xJH/CODDCnc5+VnuUDuj5C+ETj/fGGwQNNlv9z4IyCe3lzk+XtRoXGYVG+q1tUR9Pg5vp5WljvAv4K/LbWD3anf/g43ysUNVhxs+/FBdvjqaGXna7578DIGvL2xa1MZRuFZlUUVEFhc3W44rYE/lywvRNwWol8o4GJ6fdyix+uicAWJfb1SS92VU8dfAD6LUr0Djr9w3uRY9L/v8E9ckophTHAuQXbR1FHb6VOmdZJSnOVKvkWTUrp77hyrWhqaFCWLjPfbCX2LYyPTTb8ocHD53wJbJv1s1Agk2jS6Qk3G3+QPvL30CGzNm7m/ZgarCIVylgSD3v2VuEz30IZZ8ctEOc3W89tqsMDgetKpHeZUAfhUxw+oMhhp0KZO1BlPDflOwSfWlB5KKXGk06mxEB0GytuqxIKquJHkgo9qAZl2Cg9XH2K0jcDHqr1Y4UHDX2HKibKNtfnokmGwWl7AG7GLB7gHojbzpcsSJsf78G01FSR6vEdYLUa8/cB9qDADbkN9XQJsE+J9P1a8QFrh2LNww+4Ae8pvE6BR2IHznstcEADxw0Abk3yHgcs2kYZp8XNqxPpYM+5BrmmSe/6smX234ib9n5S+C2uodz+eK9shTL7+wHD0jdlwarl1XjSHfAu8BRdb3x86t4WV15LTXwNytDVi9q8IG0O3EZe1QRRVNbOwIvArJ18CAvOfzHw06K0hfHey+54T0+46fLGEsffBmxVsP1dfEyn0fkiO6aXo2ONnhrlGgXcViL9WnLU88nbD19N2ICjOnzeJdMzvHSdxx2SPsAd8bJN79aP0zNf1lxOB02BqQ6uqrB/x9TweJI65iSmYw9O34fFcFPsWcCDqUHwn/R395rKquOkK+OOAWOSltwNH2N5jip2xAYqr1/6oM/PN04SFe3/rVZQqcyNcS+locDpeFf3pEYebNzp5AFq8DzC1/X5DS2YaJoekndKKRM8QOjl+CDpu3gjZK0yD+vf0ou2T8r7XCMfbbw38hoZ9igryNZl5pu1IK0/bkpqu7t9d/2ld/Q2OjwUkM69A+7xV5OpD29kvgcskoGse1DG/IVHoniYDvRAcdPd21TwKUh5PqXMxN0q5U+XvhEv4VaJA3AT99z1Xl+9F7Yk8Fl6YSfice3qEr6Oc41MD94L1DBhtE0KSniP6UPcFNBwDyiVdR7ufVTRXJZu6mv44OQUExqL8vZLCm1wUXp/YBF8oLYWN/25ca+uUuNS0yTlfAM+B2eR9LLVPCcuXf8vkmKbvx3PTIvu+ViSYws+znEy8FDWcsWv4j07A48uX7UHgo8H1RQNpA1y9sOnzaxWIv0Z4FVq7Fk0KccRwKU11tXPG73Wlsha48lmw1v0H6QP7I14FOWG3FPbVOktV1AF1z7FbPlGbxo+t+hyyvTC8AnRH+DedOfgraqSihFv8V+P92z/lY77Oz5/7Iuk3K+qpuRqlP3o9LGeKm3PgjdUqg7+4uNHf0iyTeGEkKcfbq66G/cefDfdg5Z5d8WvLfdsIN5gPig9awNSo6p/Ub7huHkpszBkuBn95qK03fCpByvhDdOaHBIaPP/06blerIa8/WtR+m2trxov6rPUslw2bQtfFO1FcmKqaZeCaoOcA/EB2j+Vuvm4V9HJBfV8DN7rWK4o33S4F8wlXQ8SPkFwGD6/oO3zL3CzTrVIBYsnhXwXTXhcdfD+TI2bOi+ljYPn8Wv5fZsfN1sZPs7xOW75OC29D31SY6/q5O42yzkAn7IwPG1PhTsVrJK2r6RCBJUWnP8Y4C9Z36+a5a3xohYok75X0sbrZ34h3URBJVmnSy/LUUXpgykRPim1sCYnhbBhUkQP4aaNzMIp4XNpLiyRPhifN/cg3rs7jhy62Va4ro55osWvpfdNhY0+3Gz9S9w54Ql8vl1bhiTqlHNv4Ib0/4HA9QX7Fknf1LqDDNdw3q7QW3O3uux2/ZQEbxhJ6+BxsepdwbOlSJpoZsOylKEeJM2Of8B/YmaXp7SDgZXMbIrlBCQNwMf8foz3Sn6HezY2dwObIC2a9wQeIug/Ka0vPh/mHbyXeLOVXrMoCDpCenc2BR41s+dyIM9UuPl9R7ynvr6ZPVaw/wzgX2Z2SAvPOTU+nn6MmY2tlj8vNK2gACQtambPtkCeZmToVgoKvl7rZgIed/AxfAB1KzN7uMIxwntYL3dEyCpIug/4pZmNT9sH4uM4a5ovChkEQRGSfoh79o4zs+2L9s2Bu3cvZ2avtOh8p+CTbrfNslFbLy1ZsDBr5dRdMbNHcTPpX3Fz2auVlFM6xvKinBJX4QoJSUPx0Dd7hnIKgoqcjYeg+kXxDjN7Ew9ovEcrTiRpLTz4wT7dSTlBD15Rt7tgZlfjprBTcC+57sbVwGaS+uHXcaLVsFx2EPRmzOzfZvY9M3u+TJYb8FBgTSFpenx6yx7mK4h3K1pi4ssD3dHE10Uy240CrjWzr7KWp14kPYLPj1oeWNHM/puxSEHQrZE0EHeWmNvMPq6St085i4Wkk4AZzOwHbRCz7fTLWoDAzXZ4T6S7chXuLRXKKQhagJl9Iel+4Pt4qK1KnCnp32Z2YGGipIXx5dmXbI+U7SdMfEErOBsffJ2UtSBB0IO4hSpmvmTC2xo3s29ctPsE3OT+dpvkazuhoIKmMbN3zOyKrOUIgh7GLfgigJXYGp/4vz1wdvIARNKawDL4dJRuSyioIAiCfPIoMETSPBXy7I4vBXMvcCZwQXJYOhmPSPFFB+RsG6GggiAIckhyfLgNWLvUfklLAPPh8VHB51VNC9yMx+bs9laNUFBBEAT5ZQLlx6F2w0ONfQmQ/u6IR5r5UXeb81SKcDMPgiDIKZLmB+7HV2O2gvT+eOTzNYoDJVRyO+9uRA8qCIIgp5jZS7i57jtFu0YAz5eK4tNTlBOEggqCIMg7pdzNdwfOzUCWjhIKKgiCIN98raAkTSNpJD6Bt9s7QVQjIkkEQRDkm9uAcyXdDqyAr0x9gJl9mq1Y7ScUVBAEQY4xs/fT8hxvA3f3BsXURSioIAiCnGNm52UtQxbEGFQQBEGQS3KnoCQdJel1SY+m38isZQqCIAg6T15NfKeY2YlZCxEEQRBkR+56UEEQBEEA+VVQ+0t6XNK5kmYsl0nSaEkTJU0EBnZQviAIgqDNZBKLT9ItwOwldo0BHgDeAww4Bo9BtXsHxQuCIAhyQK6DxUoaCowzs+I4VEEQBEEPJ3cmvq4VIROjgCezkiUIgiDIjjx68Z0gaRncxPcysFe24gRBEARZkGsTXxAEQdB7yZ2JLwiCIAggFFQQBEGQU0JBBUEQBLkkFFQQBEGQS0JBBUEQBLkkFFQQBEGQS0JBBUEQBLkkFFQQBEGQS0JBBUEQBLkkFFQQBEGQS0JBBUEQBLkkFFQQBEGQS/IYzRwASS8DnwJfAV+a2bBsJQqCIAg6SW4VVGJNM3svayGCIAiCzhMmviAIgiCX5FlBGXCzpEmSRpfKIGm0pInpFyvvBkEQ9CByu2ChpDnN7A1JswITgAPM7K4K+SfGOFUQBEHPIbc9KDN7I/19B7gGWDFbiYIgCIJOkksFJWlaSdN1/Q+sB4QJLwiCoBeRVy++2YBrJIHLeImZjc9WpCAIgqCT5FJBmdmLwNJZyxEEQWeRtCPwiZldl7UsQfbkUkEFQdD7kLQWcCLwpaTFgN9aXr24go6QyzGoIAjyjaQRkr7bwvLmAy4BtgeGA9sBZ0sa0KpzBN2PUFBBENSMpKkk/R44C7hF0vAWlDk1cDVwgpndZmaTgdWAWYG/hZLqvYSCCoKgJJJmTMqja3t+4G5gPmApYFfgOklrNHEOAWcC/wRO6Uo3s8+AUfgwxBaNlh90b0JBBUEwBUlxPA98JOkDSU8ADwFjgVFm9qGZ/Q3YFrhC0voNnGMQcC6u7PYoHm8ys6+A3wH7NXc1QXclFFQQBKWYFQ83NhBYFNgRWNnMTilUJGZ2G7AZcImkuWstXNIw4JF0jtXM7F9lsl4PzCtp2cYuI+jOhIIKgjYhaWNJP5Q0JGtZGmBh4Dlz3jWzx8zs+VIZzew+fAxpu1oKlnQQcCPwczPbPZnzSmJmX+ImwFz2oiSdJekHWcvRUwkFBUhaNpk0gqAlpOfpRGB94HlJ10jaMGOx6mEh3MRXK5cAO1TLJGlp4DBgBTO7rMay/wxsIWmmOuRpO5I2x8fHfhzfj/bQ6xWUpBmASeS0hRZ0W5YD+gIbAfMC44AzJG2QqVS1szDwXB357wSGSFqySr6fASea2Su1FpzicY4DdqtDnrYiaWbgNGBT/D6vnK1EPZNer6CA7wFPAUdKWiprYYIeww54iC4zs0/M7BzgQOAESX0zlq0W6upBmdn/cAeK7cvlScprddxkVy+nA/tKyss36/fAZWZ2D97D2zNjeXokebnZWbIGcDnwI+BSSdNkLE/QzUkKaFvc7FXIX4FPgJ1LHPMDSZt2QLxaWYj6elAAFwPbVzB3jQFOqeAQUYkHgQ+BzHugkjYBVsKvB+ACYDNJ02cnVc8kFJQrqDvN7C+4V9EpVfIHQTW+D7xpZs8UJibvt58AxxQ2hCTtDBwJnC7p8KzHM9L5F6a+MSiAx4DPgVVKlLkYsC7eE6qbVHenA/s2cnyrSEMCfwR2N7PPk2zvALdQo5NIUDu9WkGlJT2WxFtn4A//2pJiYmAPRc72ko5o42l2YMreEwBmdj9wP3BQkmcD4Ld4z2A4sDVwbsbRE2YB/mNmH9ZzUFIiF1PaWeKnwO/N7NMm5LoCWE3SLE2U0Sw7AHeb2d1F6WHmawO9WkHhLb1JZvYFgJl9greCzpAUCyR2YyRtKelASbMXpM0FXAscARzeteZYiWMb7sFIGojPC7q0QrYjgB9JGgFcCGxuZk+lED+rAzMCd0s6L3n/3S7p+BrPL0mDG5U/0UjvqYuxwFaS+hfItBAwEji1GaGSO/oNwJbNlNMkW1L63k4AZpa0XIfl6dH0CgUlaYCkBUvsWgP3PvoaM3sY+AFwbTJL9EokzZk+oN2OdN/OBFYAnpE0QdIvgb/jZtzl8ZA9m5Q4djrguaTMGmFD4O9m9nq5DGk+0cX4JNQ9zezegn2fAZvjHmJ3A38BTgL2klTL6gM74tEfHpR0tKTh5RwLJM0kafUSuxoZf+qS/yU8bNH6koamnurf8N7Tx42UWURXQNmOI2k2YBngpuJ9yUnkXGCPTsvVozGzHvEDJlbYNxp4HZiqKP0eYJ0yx+wKvAzMnfW1ZVCXfXHF/X5xneX9h8duexDYN21PA2yFK6ylC/LtBFxX5lkxPPROI+e/GvhBDfkGA6vXUe4TwLAqefrgHqkjgDWBE3BlcWqZ/JcDL5VI/xVwZBP3YF/gA+Bd4AzcU7ZPi+7vAOA9YL4Mnq19gIsr7J87Xfc0nZatp/4yF6BlF1JZQV0LfAzsVpA2DfAZMG2F4w7Fl5ofkvX1dbgujwDuAO7C465lLlMdsv8UN7dU/CAmBfExMGNBmvAe1kXA1Q2ce4ZU5gxtuK5TgUOq5BmFx8tTkUwvAxsV5d0SeAZ4B5i3aN+lwA5NyDo1sDYwoE33+E/AYRk8W7cBm1XJMx7YttOy5eWX3qEZgUUKn8OGy8v6glpYMSUVFDBV+mhsnVqXfVL6WsD9NVT20cCbwC5FL/7UeGv7J624EXn54Waxd4B5gN2Ba7KWqQ7Zl06t9nlrzH91UaNlBeAlYDbgo3o/sMBewJVturYtgHEV9gt4uFSDAl+64k1gtrQ9JG2vAlwJ7FiUfxKwYtb3s8K1rgE81uFzzpqeiamr5Nup0n3qTj88eEHJBiowFLgObwzeijdon8CnUXySnq+78YghjcuQdSW0sDLLKah1gXvTCzwJ2DilHwX8psayhwETcbPX99Kxb+Oz2yfiXlgqOqZ/Uor9s66bCte1dKqfqdL2IHzsYau0PTi9lDNnJN/UuNNAVUWBm34eA3ato/xtgPEF2+cAh6f/HwDWqlPeh4EN2lQXs6R70a/M/nUoaICV2P8r3MFA+DjOySn9AOCsgnzCG3QzZflsVqmLPsBkYMkOnnMvYGwN+Qal+zRL1vXUgmt+ArinzL7f4A4+66Vnb830PZkhPUN98bH81/Hx1poajVOcpwGhd8664srIVU5B/Q4Yk/7fBncRBbgdGFFH+X3xFsUr+GJti6f0mYDH8cCXXXkXxMdB3sXdT3PZwwLuA/6Bt3jG4eaJc4vyjCWN52Qg3xGpDj/CXYx3pox9H580eUM9dQ1Mmz7Gs6QX60O+6WX8Ag/JU2tZywCvAn3bWB9PUKZFipufyr6beIPpoVSPz3XVY/qoPFuQb1bg/ayfzRrq4rfArzp4vltwb8ta8l4C7Jd1HTV5vXOk9+FdYP6ifX2A14Dv1lDOINwKNZkCc3rNclQoeIkSvyWBe7OuvDLyllNQ/wSWTf/3A17EJ1J+Bgxu0blnT+c5CO/iv4uHtZkO77X9vBXnaXF9zYM7QQxISnYb4NfAoKJ8I6hiCm2TfANxM8F3cZPbbnij4qpiJYSbG94vfpFqPM+leOt4fzx0TVf6MOCpOso5FTiqzXVyKvCTEukr4eNMFXvruPv4+xQ4Z+ANrw+B2dP2KsCDWT+fNdTFsuldbnvjj296rxXNewX5R2bxzrT4mndK79ofSQ38gn1rUqeJFfdKPa9uOSoU+AnuNnle0W9yhypoA+BZfD7G4TXkn0JBpRfyDb49drRferDLOlU0KO+8eO/qKb7tLTY7Pq6xa9YPXZG8B9bywOBK/S1gkQ7LtydwQ1HaVMCjwOii9GuBnzV4ns2S4nsSWLMgvQ9uxq2q9HBT5Pu02bMMH4e6oUT6X4H9ayxjChMh7u7eZdbdBbiok/e6wboQ7uSxcgfONZqCxksN+fvj47gLZl1PTVzzhcDeqcHydNE39ByqOOyUKG9Q+u6OrOu4CgU+SImxh1IvSBsqpy/wArAA34wtLFHlmFIK6kDgz0Vp0+Buqie1Qe6ZgIEl0hdLH7v1sn7wCmS6p9aHBTgZOKZgexje82jIrlzD+frgjZM1SuxbHO+hLpG2N8J7rw25w+M9tY/T+Yp7ZudTg3kTjy4wvpHz1ynrLEnWfgVpq+HmloZdm4FDgNPS/8fQ5p5gC+vjp8DZbT6HcCeALes87lTgF1nXUZ1TcBYAACAASURBVBPX/CY+VKGkWJZL+6bGXennbKDctdKzOn3Nx1QobDBttKdXuZCVgZsKto8AjqhyTCkFdRMl7MZ4iPzFO3xN38cHDMu6tVc4djNg7RbKMld6yGryUsPHV17C3UdPw3tU5+O9jna4VG+GN5BKmm/wyZCPJ3leBNZt8nx/oKhXltK3pgaPLLwHVtcHrAlZvx6Hwhtyj9KkWzOwIvB4+n8sTbiYd/KHm34/pIGxjRrLnxafcvBovQ0APGzVFI2e7vDDzeovFmz/itSgT+/EzU2U/UeKOg2VfmUjSZgvEfAVfB0ippPMhWvaLiantG8habSkiZIm4q6zhfumxbuntxQfZ2bXmtnTrRW5MmZ2B95rOaiBw48CrpHUqmCUmwPXm9l/asz/GD5m9wJuvlgCHxO6DbiqlXHjUpihw4ATLD3RJTgHf/kfwRsmE5o87YFmdlaJ9AnA6pKmriDvwnh9XNekDLVyB97YATc9fQzUuvBfOf4ODE0LAjYT5qijmNnb+Mq8u7a67HRfHwC+AlaxFBi2Dh7EGxDDWi1bB1gXf/a7uBjYLkXp3wlX2o1yGLCOpHVryl2j1psMLN9BDb4VBVo2VUrJ2fAFeSYWbW8C3Jp1a6RIpoVw82LNLqi4Yn4f78VMpgXeQfgE3I3rPGZVYHhRWl98/OMCWtRSxN34n6NK7x3vPY2jzZE+Ul2VdR0HjgN+28FnaAvcW3FmfJxj6RaVezNuWfiIHLuYl5B7lfS8tCpSRT/cW/QdfAym4eca9147nxJm/zz/8NBUWxalTcLjlH5EkSNVA+WPwnulVe9ZrbH4DgNuljRFkEZJm0i6t8QxzTAZ9zLrYm7c2aEeRuKtq9xgHoNtLL6qaK1sAEwws0fx8YaDJP1a0uKSBtUrg6Q58C78zfUcZ2b3mtkDRWlf4XHRFgd+0+xicqn3dATu3v1VFXk+NLONzAOstpMb8fh6U5CudxfcmahT3IUr8WOBy83ssRaVeyeu/P5nZh+0qMxOcD/eu6+tRV4GSVNJGo33zPfAI2+caemL2iBn4c5Tb0q6QNIGNcZTLJatr6RJkq6WtENa8qMlSFqwMDiypKnwxuhtRVkvxs1z48zjRTbDX4H/UkvQ3zq03sq4khiDm3l2wz3WnqPF82T4xh18fr5xkqg4KY+CHhQ+sPcqsFjWrZEScs6K96IWqDH/lcAuBduz4e6fz+Br73xAGuCusbz9gL+04Zruw81cDY1JpXt2Ct6yqsmdt0P3a2ng+TL7lgH+mYFMT6RnqGU9Hbzx8zndwMW8hOx7UCKuYh3HL4A3im8EVmuDfHPiDluPUcUSVOb4tdN7sQvusfoJbgJvVq4ZgP8DjitIW7PUM5Cu4X+0aCI6sH76hpWceP51vjoLXRJvrXyMR1DYmhZ1rUucayTunfUCRX74ZfIXKqi+eKs3lwOUwM+pbVZ6fwomj5bYL3xC3fvA0BrPfTuwaRuuaQDubPAcsFSdx/bFx5Xuo00D3k1cl/CG2UIl9v2knsZBC2U6iBbHe8O9Gb+gQjDUvP74xjN3aIPH7w+c0wE5Z8K944bXedzZFLh142b/D5v99uKhzG7BOxo/SmnHUmYCdGrEtMRxLr1Xd1HQ+C6Zr8bCZsNDW3yAe8bdiIcPyk04D1o8r6nNsk6bPnr7U2GeDR7mZ1IN5Z1S2AqqkG8B3IbcNps4bvJ7FzcLT1dD/gF4VO0JNGnbbuM1nUuJeUakcZus5Wvhdd5BN3ExLyH7ydQYuqzEsRfjK+R2Qs7t8Z5UTSHQ0vvxPlMG9H2OJkM9ped3K3w45RV87G0iJaZ3tKkuVsO9g8t6E9c6ZvACMB/u6rw+3jt5CHhQ0hI1lhEkzOxfeOSGVYH7Jb0s6cwS3mIj8AHLapwJ7J7sx98i2a9HSLoa93o7ztICje3AzC7BH7xlgRck/ayUzVxSP0nb4J5SA3CnjWZt2+1iPD4W+DXpXq2Mf9R7Csfh5uPuyBn4OzCwgWOH489hJxiLT9M4uMb86wJPm9mrRen34Q4iDZHWtloRn9f6Gv58n4BHIb+/0XLrwXxV4mfxmH1lM9Wi6UqOl+AhYt4F1u+Exq0iY7fpQRXJLdxN+QaKuta43XnVGsuZQNH8FXxi57N4ENPRtCi0Ux3Xthju4fchPgh/WnpmfoSH5rkb9xxri5m4hdcxE273H1iQti5lAmnGL7P7NIE6TZ98E6W8Y88gPgH2PWqLUvIXSnjupvfovCZk2I+iiCF4NP9DO3zPhuHjfyXHnVtxgnWAdzp5UWXk6JYKqkD+OZKy/07a7ppMW3EQseD4URTEScSjMYynQbNHi69tCD7QexBuLrsQWClrueq8hvsoWNwSD1Z6ZNZyxe9b92hb3OO1nmM2pomJp03IejhuHSk7To5HbSg5Bg0sRUGQ3wbOfw9Fa4RleN9+QJnoEk0v+W5mt+AmnaAJzOxN3P387OS+3OVe/mWNRVwPzCdpmbR9GB7/qh6X9rZgZu+Z2a1m9jsz293MdjazB7OWq06KzXzFkxmD7PkrsKykoXUc00nzXiEn4Z5xO1bIMxJveL9dYt8/gNklDSmxryKS5sWtG3VNNWkXZnaOmX1cal/TCiqd4NlWlBNwNj5zfW9qH38CICmyPwH7SFoNd2vdtg4FF1TmawWV7Pfz4eOwQU4wH1sdS32RJYbToTGXQszsv7icJ0mas0y2bfGYl6WO/wqPVjG8gdNvg68YXWskmcxQ6mJ1eyRNNLPuGFbkWySnkztxF/PFzOytOo6dA3cZ/QyPLVezggsqk8K8vI07f6yOz7Qfla1UQTHJgnAtPm5ecbJ3uqcf4mNB73dCvhIyHIWPw2xsBR9jSdPhYzPzW5mJ0+nYAWb20zrP+Qjutl48GTd3tKQHFbQOM3sKn7H9XD3KKR37JnANPngayqmFpI/dzfgEwzDv5RTziCvv4WOe1VgCeDMr5ZQ4Fh9v3qUofSvgrnLKKVHVk09SH0nzdcXLlLQovgTQnY2L3DnqDrsRdISj8ZWAG+EH1lO6xfljPO51OByP8Bzkk3PwgfdqYyxZjT99jZn9R9KuwARJd+KedHvjynP7Koc/CAyT1D+ZDL+FpO/h35GhwGBJb+Ehhq6o1rvMC9GDyiFm9j8z+7DBY0M5tY+b8fWn/oPPDQzyySXA+pJmrpIvcwUFYB5P8VR88u3e+LzGeauZ4JJjwUt4OK6vkTSvpEvxejgZn24yLbAGHhbq6FZfQ7sIBRUENZJMrv/AvSujIZBTzOwjPNJ9JQ85yImCShyLj5utZWaX1+HA8C0zn6TF8HmPz+Br3l1izn/N7CUzu72K2TBXhIIKgvo4Fve2DPLNn4CDy0X+Tunz4YF3M8fMvrIpo0XUwtcKKl3TtcDhZnaUecSabk0oqCCoAzO70swezlqOoDLmYXTGAWcVLidRwAp4nMvuPg3jfmDl5JF4Cb4S+XkZy9QyQkEFQdBTOQRYFB93KSZP5r1meA4fXzoHj0j/42zFaS2hoIIg6JGkibvbAseWCGrdIxRUGgu9D3eA2LqUN193JibqBkHQo5H0AzwO5P74JOs18QnXi5UJI9StkLQU8Ln5it09ilBQQRD0aNIY1B/x5SVuT797ysV/C/JDTNQNgqBHk8xg+2QtR1A/MQYVBEEQ5JJQUEEQBEEuCQUVBEEQ5JJQUEEQBEEuyZ2CknSUpNclPZp+I7OWKQiCIOg8efXiO8XMTsxaiCAIgiA7cteDCoIgCALIr4LaX9Ljks6VNGO5TJJGS5ooaSIehyoIgiDoIWQSSULSLfiyw8WMweNjvQcYcAwwh5nt3kHxgiAIghyQ61BHkoYC48zsOxmLEgRBEHSY3Jn4JM1RsDkKeDIrWYIgCILsyKMX3wmSlsFNfC8De2UrThAEQZAFuTbxBUEQBL2X3Jn4giAIggBCQQVBEAQ5JRRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEARBkEt6jIKSND5rGYIgCILW0WMUFDAkawGCIAiC1tGTFFQQBD0YSUtLOiVrOYLOEQoqCILuwp7ALpKUtSBBZwgFFQRB7pHUD9ga6AfMn7E4QYfol7UA5ZD0MvAp8BXwpZkNy1aiIAgyZG3gReAtYPn0f9DDya2CSqxpZu9lLUQQBJmzPTAWGAwMA67IVpygE4SJLwiCXCNpamAT4HJgIt6DCnoBeVZQBtwsaZKk0aUySBotaaKkiYSbeRD0VDYCJprZm8AkYPlwlOgdyMyylqEkkuY0szckzQpMAA4ws7sq5J8Y41RB0POQdDVwvZmdl7ZfBdYys+ezlSxoN7ntQZnZG+nvO8A1wIrZShQEQaeRNAPuIHF1QfIkwszXK8ilgpI0raTpuv4H1gOezFaqIAgyYHPgFjP7uCBtIu4oEfRw8urFNxtwTTIz9wMuMbOItRcEPRhJcwHnAgsB/wSeBdYEflmUdRJwaGelC7Igt2NQ9RJjUEHQfZE0AldOp+Mu5IsAiwJzAGPM7IuCvLMAzwMzmtn/MhA36BB57UEFQdALSBEijgW2A7YpcIR6Fri+1DFm9q6kD/mmpxX0UHI5BhUEQWnk7COppzgJHIM7QC1byUu3BFM4SoTrec8jFFQQdBMkDQIuw3scB2QsTlkkzSJphxryrQ7sAmzdQMSYbzlKSFoMeEXSUnWWE+SYUFBB0A2QtDDwAB6fciVgpKS+HZZhTklLSVpD0qgkUykGA7+Q9LtkwitV1gzAhcAeaSpJvXzdg5I0M24OfAo4roGygpwSCioIcoykgZIOAO4FTsU/6P8E3gSGd1COdYDngEtws9wulIkqbmYvJNkWB25IyqiY04EbzOzGBkWaBCwnaSBwFT5XclNgcUnfb7DMIGeEk0RQktTyncvMXslalixJvZQ+ZvbfDp93IL7+0eG4OWtdM3usIMt1wMa44uoEKwOnmdlhtWQ2sw8lbQicBDwg6dfAZOB1YBVgOZqYbGtm70t6H7gW+Bg4wsy+kjQGOEHSStYBF2VJQ4AvzOyzdp+rNxI9qKAcOwGPSpona0EyZgTfjmLQFiStIOlHks6UdDv+MV8H2MTMNi1STuAmrU3aLVcBy+O9lpoxsy/N7EDgV8BI4CjgBtwMt4OZfd6kTBOB2VNZX6W0y4C+wFZNll0rvwcmpIC230LSvJJGdUiOHknMgwpKIulKYB7gCzzu2VdVDumRSLoWuM7MzmnzefYElsLdq58FnjazyRXy98F7I6t1IiadpMnA6maWm3WYJM0LfF7sYJHMkWcCi7e75yvpReA14G1g2655WZIWBG4FZgIWNrO32ylHTyUUVDAFkvoD7wJLABcBt5rZr7OVqvOkyAZPAPPm0YQj6WzgKTM7pc3nmR13QJi5E2azViDpJrxhcXobzzEr3piYEw9ofZeZ/TR5FN6Cj9UtA3xgZmPaJUdPJkx8QSlWBZ5LAXt3Bn4oqWMD8jliN+DyPCqnxHV0xsy3PDCpuyinxBjg0DZ7Oq4EPGhm/wZGAVtL+hVwGx794k/AicBeXbFFg/oIBRWUYkPgRoBkZtoHuFjS4Eyl6iDJhLYHcHbWslTgVnxtpBnbfJ7l8fGeboOZTQTeB9Zq42mGAw+m872Lvzc7Aweb2QUp/QX8PpVc0y6oTFkFJamvpM0lbVY4l0FSpwYfM0HSXJIO6OWz0kfig9kAmNnVwN+Acb1ISa0LvG9mdTkGdJLkZHAH7sjRToZRp4NETjgX2L2N5Q/H56YBYGbPAvOZ2WVF+Y4HDpY0oI2y9Egq9aAuxF1BlwHukbRQSt+n7VJly8bA74Dz01hMr0LSUGAWpmwx/xD4B3BrmhjZlX8aSUdLOqtjQnaGPcl376mLmr35UoSHyZJul3RwwTtdjW7Xg0pcAoyQNFOrC06mw2HAQ4XppcygZvYI8DRQMbpGb/zeVKOSgprTzH5mZkfhgRz/3EsmwC2D26+HANem9ajajqTF68h7eBqgbQcjgPHFUaLT9r64ueKuFFVgW+AZPOr0WpLWbZNMUyCpn6Qr2vTxmQ1fJO+SVpfdBsYB60uau4a8xwN/BU7GHWDukfSQpFXKHSBpTmAg0O3mw5nZB3jPf7s2FL848LaZvV9j/t8AhyXT8RSkMd7bWiVcj8HMSv6A+4CpCranw1+Gd8odk+UPmNiicu4HVgP6A+fhNuYhZfJOB3wHf1gXBRYGBjdwzs0AA5aqIe/cwH9x76F521CP4/Co0pXyHA58DjyCuzmDLyz3GNC3Q/d7ZKqzfdtQ9mHAOZ24jhbJezjwHr5G0oAyeVbB3dIHF6T1AbbH51xdAMxe4riNgZuyvsYm6mZd4JE2lLsHcGEd+YX3Qjcrs38q4CNglqzrrAP3ZCV8nuU26buxETCwZN4KhQwvfmDxCXA7Zn2BZeRtWkGl6/sMmD5tC/g1sH6Z/N/DzV5PJ4XxPB74sp5zzg+8A9wMHFVD/u3wVvCBeKt2sRbW4dTAJ/g6O9XyLlKojFJd3QPs2qH7fVmqhwdbUNaewJWpvHHpYz+8xfIumBRIvzbVx0L4uOHT+Ly1wn39gEeB7cocOx1wQrrudYr2HQUc24l72qZ66ZPek2VbXO7ZwH51HrMCsGCF/VcBO2ddZx24Jzvi01cuw0NUjQNmKpm3xgLnyvqiapCxFQpqUeDFDso8FfAwcBDewn2ihmNOB36c/t8Fj8m2XIvk2QCfy9Ho8cPx1vg0ba63GfHwNkOAN/AJmc2UtwIeeWAzvDXXEuUEzAwcjI9TvA2cRmr8tKlehMejezV9QLsaWgfg5iNVOX4j3GTbryBtHLB5O+9nu39Jyf6hxWU+ASzf4jK7pjVkXmdNXse0uNt9n6bLqvGEk1t9M9pQKa1QUFsDV3dQ5j+kFoRSS+9NYJEqxzwOrFSwvQVuupm5BfKcChzeZBmX4XNA2llve3e9yHjL/zedumd1ytnVK1uPNvWcypx3MB5J4TXc7fldYIkajhNwOx6Qtmv7TdwzLfP6bKI+huK9w5JmpAbr9zOgf4vlnA34kDJm2jz/gEG4E8jVqfF4My0wV9Z68h3wOQVblti3CXBvDiqoFQrqWODIDsm7JfAiBeY04I+VFATec/i0+MXAB70vr9ZCriLPHOmDVnUcrEo5C6SPQdt63bhr78j0/5KpAdWRsa86ZByYXtSmGw5NyLAm8EI9ChxfPHAyMA0wV1JuDT9XefnhkR12b1FZawH3tEnOByky0eb9lxoyt6Y63pUy5rpGfjVN1DWzi/Hu/x8kjZHUX9Jukp7CoxVfXEs53YBlcFt9J5gad0b4sCDtKnzQsByrAA/ZlPHFxuDOGts2IoikjYG/42ahJxopowvzWG2/BZ6UdK6klVo5pyyFkZkPb6FhZv8A3sK97vLEWsBjVruXV8sxs9vxscIj6jjmIdxR6Id0zwgS5TgMOF7Ski0o61vzn1rMOPxb253YCp+asoGZnW/uPdka6tSUS+Jd249xj5StaYGdsUVavBU9qNeBoRleQ3+891HSOw+PAn10mX3L484WZXsueEtnFjyA5XS4qeJU4GVg1RZfy6y4U8Dz+DjbDC0q9zjgt0VpBwAXZ3Xfysh5BvCTrOVoUPZF0nN4GvCrrOVp4XXtDPyz2WcRDzE1hTWpRTIui6+71S16rbhp7zU8kHDLy6+pByVpNkm/Ae7G15+5F/g/4HYrmi/THUit+nmK0mbBB/cym+9h3jO6Hh9gLMVq+D0odewk/INybqkei6RF8S748+n3Bq7QZgaWMbOWritkZu+Y2Qn4x+6fuNdhU6TJkTvhLtGFjAU2kjR9s+doBan+N8Y/ZN0O8wURr8TnvXXHCBIlMbMLgfF42K6Gwryle7sSKcRRG3gUN68u0qbyW80Y4E4zu6sdhdd6k17AzSprm9n6eMyph4AHJS3RDsGapcrExZNwz55ClgYetdQsyJCSZr60gN2yVDYtHIePU92WIgUsllZkPRpvVFyPj4nMZGbTmdlAM9vezD5qw3UAX0/wPRI4oNTKqpKOk1RrnLK1gTfN7Mmic7yH28DzEoZrWXwZiGezFqQJjsbHnR+qlrGb8WO81X9Ug8cPBb7Ex+laTvr+VDTzSdpA0quSHpM0XtL5kr7TDnkqkRq9e+KWkvZQYzdugTLpe+GDqCXnCXW4qzmx4P+++Ms1d4l80+Fmyg+BQQXphwC/z8F1DMQn7M1WlL4a8HANxw/CFdxZeNf7c+CKUnXR4es6nyIHFDxqxSt4b25khWPnwT+Yb1BmoBt3rx6X9f1LshwFnJi1HC24jpZ6qeXlh5ufXwdWbuDY04DT2yzfRrh1qtS+GXDluCneEBqBO1d11MSNDxfcRJry0rbztEDQdchBdAmKxqDwMDWjS+TbEJ8Tch2wW0H6RYXbGV/LpcWyAz8FTmngIZoiOkBG17QQPq4xQ9qeMSnQtfDlxN8Bli46ZmW81/c+Plb23Qrl9yMn7rm4WWyNrOWIX8V7tAvuDFLzWA8+zvsWbfbMxE18n1BirAwPgHt6UdrsqVHbEjf6CnJtia8gfBM+1+7Rdjdiml5uw8xuwVv3eeMGPBxOMWvj5qDiSMdL0zkPvmpcAexfFDn8e5QZfyqHOW+1VLIGMV/19Xp8UjL4g36tmd1mZvfjjg7XS5pD0iKSrsLnVF0HzGNmB5hZWQ9D8+XF/9Pmy6hKMi0PxU2qQX75Cz5RviazcBr/PAM4wtrsmWkepf4u3Cz+9Tda0gh86sBhRfnfwr9d67VLJkkr4fM2X8Hf3e/jc2PbumJx5i2ZFmr34h7UENzbcKqi9MdwN9H++Oz+RXCz2r+L82Z4LcK77ffgJru+lDD7dbcf3/SidsEdNaYt2v8zvGX2Lv4STp21zA1c4z7ARVnLEb+a7tWa+FzEqj0PfDjjHjrktYzH93wIV1RLAtOnd2OdMvn3B/7SJlmEN7g6bmHqsQsWmg+c/wNYvSstRQCfD1dm/8VbUbviD8BzZvZ/GYg6BeZPxf6499t1eCiet83s7UwFaxL7phd1Lh6z719FWX6NK6bFzex485VKuxvd1nuvt2E+T+xJvPdelvTdOAYPTNwRr2Uzexo3cY/F1/y6HfibucWqFFfjnqxTtUGczXEP5wvbUHZFeqyCStyIjzl1sRbuEvll2j4Pb80vT37Me8DX3m974qFmxuGtt57Az/CAmFNcjzljU+OiWyBp1TRt4TtpyZTVcBt90D04FF8GY0iFPCfgvZPHOyQTAGb2lZmdAXwXH7L4SYW8b+DKtqVL3qRFFo8HDjGzr1pZdk3nT124bo+kiWY2rChtWeAyM1skbZ+NB2T9Q0GeB3DPmLPN7KROylwLaTXjM4BrzOzGrOUJvo2kcbhX2LT44PY9ZrZTtlIF9SDpVNzMP9qKPoiSdscjcSxnZp9mIV+tSDoQj9q+awvLPBg3K25YNXMb6OkKSrg76Rpm9pykF4GNzcPjdOUZDfwJvwm3dlToIAgyJy16eQduqRjTpaQkrYoHc17dzJ7JTsLaSA46j+Oeu007DKV6eQb4vpk91Wx5jdCjTXzpQbsRGClpATz+XXFFX4a7MefKxBcEQWcwjx23Fu71e7yceXFv2p27g3ICMLPJuEKpOS6lpEHpp7TdR9Kykg7D411elZVyAp870tO5Afes+hy4rbgLb2YfS5rD2u0uGQRBbjGz9yStDUwAfgesgU+2Hp+tZHVzBe46/7dqGSXNj8/ZGwj0kfQBMAD3tJ2AT47PdDy1R5v4Uvp0eASCO/C1ns7rtGxBEHQPJM2If5SfwNfF6lYfyNTzexRfmWDG9HsZOKzQAzHNr7oFGG9mJ0iaGo/L+b/kcJELeryCSvtuwbu985nZq52VLAiC7kSalPu/7qacupC0L75SwYfpty8+//OggvG1fXAP5lWz8M6rld5g4gMfhwrlFARBVfL8wa4FM/tj4bakm/EoND8GTpQ0FJ/XtVrer7W3KKgLCCeIIAh6IWb2UQqTdJ+kN/EQb79Nk4FzTa8w8QVBEPR20pIcd+ELIq5aELAgt/SWHlQQBEGvxsyelLQ68FF3UE4QCioIgqDXYEWLfeadHj1RNwiCIOi+hIIKgiAIckkoqCAIgiCXhIIKgiAIcknuFJSkoyS9LunR9Cu1bHsQBEHQw8mrF98pZnZi1kIEQRAE2ZG7HlQQBEEQQH4V1P6SHpd0boouXBJJoyVNlDQRDxkfBEEQ9BAyCXWUoovPXmLXGOABfD0SwwMazmFmu3dQvCAIgiAH5DoWX4q6O87MvpOxKEEQBEGHyZ2JT9IcBZujgG4VmiMIgiBoDXn04jtB0jK4ie9lYK9sxQmCIAiyINcmvmaR9CTwRdZy5Jwh+JhfUJ6oo9qIeqpO1FFp3jOzDYoT89iDaiVfxBpRlYl1tKoTdVQbUU/ViTqqj9yNQQVBEAQBhIIKgiAIckpPV1BnZS1ANyDqqDpRR7UR9VSdqKM66NFOEkEQBEH3paf3oIIgCIJuSiioIAiCIJf0SAUlaQNJz0p6XtLhWcuTByTNI+l2SU9L+oekA1P6TJImSHou/S0bnLe3IKmvpL9LGpe2o46KkDSDpCslPZOeqZWjnr6NpIPTu/akpLGSBkYd1UePU1CS+gKnAyOAJYDtJC2RrVS54Evgx2a2ODAc2C/Vy+HArWa2MHBr2u7tHAg8XbAddTQlvwfGm9liwNJ4fUU9JSTNBfwQGJZiifYFtiXqqC56nIICVgSeN7MXzew/wKXAphnLlDlm9qaZPZL+/xT/oMyF180FKdsFwGbZSJgPJM0NbAj8uSA56qgASYOB1YFzAMzsP2b2EVFPxfQDppbUD5gGeIOoo7roiQpqLuC1gu3JKS1IpCjxywIPArOZ2ZvgSgyYNTvJcsHvgEOB/xWkRR19m3mGzwAAAzJJREFUmwWAd4Hzkin0z5KmJerpa8zsdeBE4FXgTeBjM7uZqKO66IkKSiXSwpc+IWkQcBVwkJl9krU8eULSRsA7ZjYpa1lyTj9gOeAMM1sW+BdhqvoWaWxpU2B+YE5gWkk7ZitV96MnKqjJwDwF23PjXetej6T+uHK62MyuTslvdy1xkv6+k5V8OWBVYBNJL+Om4bUkXUTUUTGTgclm9mDavhJXWFFP37AO8JKZvWtm/wWuBlYh6qgueqKCehhYWNL8kgbgA5PXZSxT5kgSPmbwtJmdXLDrOmCX9P8uwLWdli0vmNkRZja3mQ3Fn5vbzGxHoo6+hZm9BbwmadGUtDbwFFFPhbwKDJc0TXr31sbHfaOO6qBHRpKQNBIfS+gLnGtmv85YpMyR9D3gbuAJvhlf+Sk+DnU5MC/+Um1lZh9kImSOkPR94BAz20jSzEQdfYu0ZtufgQHAi8BueIM36ikh6WhgG9yD9u/AHsAgoo5qpkcqqCAIgqD70xNNfEEQBEEPIBRUEARBkEtCQQVBEAS5JBRUEARBkEtCQQVBEAS5JBRUEOQESfNK+iwFPA6CXk8oqCDIEEkvS1oHwMxeNbNBZvZV1nIFQR4IBRUEQRDkklBQQZARkv6CRxS4Ppn2DpVkaXkGJN0h6VeS7kv7r5c0s6SLJX0i6eEUmb6rvMXSIngfpAU7t87myoKgNYSCCoKMMLOd8HA3G5tZVwicYrYFdsKXjFkQuB84D5gJj+12JEBa7mICcAm+hMN2wB8lLdnmywiCthEKKgjyzXlm9oKZfQz8DXjBzG4xsy+BK/B1vQA2Al42s/PM7Mu0OOVVwJbZiB0EzdMvawGCIKjI2wX//7vE9qD0/3zASpI+KtjfD/hLe8ULgvYRCioIsqVV0ZpfA+40s3VbVF4QZE6Y+IIgW97Gl1BvlnHAIpJ2ktQ//VaQtHgLyg6CTAgFFQTZchzws2Saa3i8yMw+BdbDnSreAN4CjgemaoWQQZAFsR5UEARBkEuiBxUEQRDkklBQQRAEQS4JBRUEQRDkklBQQRAEQS4JBRUEQRDkklBQQRAEQS4JBRUEQRDkklBQQRAEQS75fwFP7nBsShUcAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" } ], "source": [ - "np.random.seed(1)\n", - "data = np.random.randn(100, 3)\n", - "for t in range(1, 100):\n", - " data[t, 0] += 0.7*data[t-1, 0] \n", - " data[t, 1] += 0.6*data[t-1, 1] + 0.6*data[t-1,0]\n", - " data[t, 2] += 0.5*data[t-1, 2] + 0.6*data[t-1,1]\n", - "# Randomly mark 10% of values as missing values in variable 2\n", - "data[np.random.permutation(100)[:10], 2] = 999.\n", - "\n", - "# Initialize dataframe object, specify time axis and variable names\n", - "var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$']\n", - "dataframe = pp.DataFrame(data, \n", - " datatime = np.arange(len(data)), \n", - " var_names=var_names,\n", - " missing_flag=999.)\n", - "\n", - "tp.plot_timeseries(dataframe)\n", "pcmci_parcorr = PCMCI(dataframe=dataframe, cond_ind_test=ParCorr(verbosity=3), verbosity=4)\n", "results = pcmci_parcorr.run_pcmci(tau_max=2, pc_alpha=0.2)\n", "pcmci_parcorr.print_significant_links(\n", @@ -669,7 +678,7 @@ "\n", "Different from missing values, masking can be used to include or exclude samples depending on the situation. It is applied by means of the optional parameters ``mask`` and ``mask_type``:\n", "- ``mask`` is an optional argument that can be passed when initializing a ``DataFrame`` object. It is a numpy array of the same shape as ``data`` and should contain the values ``0`` (or ``False``) or ``1`` (or ``True``). In this way each entry of ``data`` is associated with a ``0``or a ``1``, where ``0`` means that the entry is *not* supposed to be masked and ``1`` means it is supposed to be masked.\n", - "- ``mask_type`` is an optional argument that can be passed when initializing a conditional independence test object, for example ``ParCorr``. It is a string that can contain any combination of the characters ``x``, ``y`` and ``z``. If ``mask_type`` is left unspecified although a mask is passed to ``mask``, ``mask_type`` defaults to ``y``. ``mask_type`` determines for which type of variables, as determined by their role in a conditional independence test, the mask is supposed to be active.\n", + "- ``mask_type`` is an optional argument that can be passed when initializing a conditional independence test object, for example ``ParCorr``. It is a string that can contain any combination of the characters ``x``, ``y`` and ``z``. If ``mask_type`` is left unspecified as ``None``, then the ``mask`` is not used. ``mask_type`` determines for which type of variables, as determined by their role in a conditional independence test, the mask is supposed to be active.\n", "\n", "The details are as follows.\n", "\n", @@ -722,7 +731,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -765,7 +774,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -783,7 +792,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -807,7 +816,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -831,7 +840,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [ { diff --git a/tutorials/tigramite_tutorial_prediction.ipynb b/tutorials/tigramite_tutorial_prediction.ipynb index f23a2ebb..80c9fe3a 100644 --- a/tutorials/tigramite_tutorial_prediction.ipynb +++ b/tutorials/tigramite_tutorial_prediction.ipynb @@ -309,7 +309,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, From bccfbfd04cddcb56f1248cc32097c5628bfff851 Mon Sep 17 00:00:00 2001 From: jakobrunge Date: Fri, 10 Sep 2021 16:16:34 +0200 Subject: [PATCH 02/49] updated readme --- README.md | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 19799026..356c28e2 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Tigramite is a causal time series analysis python package. It allows to - PCMCI: J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic, Detecting and quantifying causal associations in large nonlinear time series datasets. Sci. Adv. 5, eaau4996 (2019). https://advances.sciencemag.org/content/5/11/eaau4996 - PCMCI+: J. Runge (2020): Discovering contemporaneous and lagged causal relations in autocorrelated nonlinear time series datasets. Proceedings of the 36th Conference on Uncertainty in Artificial Intelligence, UAI 2020,Toronto, Canada, 2019, AUAI Press, 2020. http://auai.org/uai2020/proceedings/579_main_paper.pdf -- Gerhardus, A. & Runge, J. High-recall causal discovery for autocorrelated time series with latent confounders Advances in Neural Information Processing Systems, 2020, 33. https://proceedings.neurips.cc/paper/2020/hash/94e70705efae423efda1088614128d0b-Abstract.html +- LPCMCI: Gerhardus, A. & Runge, J. High-recall causal discovery for autocorrelated time series with latent confounders Advances in Neural Information Processing Systems, 2020, 33. https://proceedings.neurips.cc/paper/2020/hash/94e70705efae423efda1088614128d0b-Abstract.html - Generally: J. Runge (2018): Causal Network Reconstruction from Time Series: From Theoretical Assumptions to Practical Estimation. Chaos: An Interdisciplinary Journal of Nonlinear Science 28 (7): 075310. https://aip.scitation.org/doi/10.1063/1.5025050 - Nature Communications Perspective paper: https://www.nature.com/articles/s41467-019-10105-3 - Mediation class: J. Runge et al. (2015): Identifying causal gateways and mediators in complex spatio-temporal systems. Nature Communications, 6, 8502. http://doi.org/10.1038/ncomms9502 @@ -59,16 +59,7 @@ Tigramite is a causal time series analysis python package. It allows to ## Required python packages -- numpy>=1.17.0 -- scipy>=1.3.0 -- scikit-learn>=0.21 (optional, necessary for GPDC test) -- matplotlib>=3.4.0 (optional, only for plotting) -- networkx>=2.4 (optional, only for plotting and mediation) -- cython>=0.29.12 (optional, necessary for CMIknn) -- mpi4py>=3.0.3 (optional, necessary for using the parallelized script) -- dcor>=0.5.3 (optional, necessary for GPDC) -- gpytorch>=1.4 (optional, necessary for GPDCtorch implementation) - +see environment_py3.yml and setup.py ## Installation @@ -76,9 +67,7 @@ python setup.py install This will install tigramite in your path. -To use just the ParCorr and CMIsymb independence tests, only numpy and scipy are required. For other independence tests more packages are required: - -- CMIknn: cython can optionally be used for compilation, otherwise the provided ``*.c'' file is used +To use just the ParCorr, CMIknn, and CMIsymb independence tests, only numpy/numba and scipy are required. For other independence tests more packages are required: - GPDC: scikit-learn is required for Gaussian Process regression and dcor for distance correlation @@ -94,7 +83,7 @@ You commit to cite above papers in your reports or publications. ## License -Copyright (C) 2014-2020 Jakob Runge +Copyright (C) 2014-2022 Jakob Runge See license.txt for full text. From 428204f76a78dd57116c0113b948d7488295a940 Mon Sep 17 00:00:00 2001 From: jakobrunge Date: Fri, 10 Sep 2021 16:18:03 +0200 Subject: [PATCH 03/49] updated readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 356c28e2..658946d0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # TIGRAMITE – Causal discovery for time series datasets -Version 4.2 +Version 4.3 (Python Package) From 99c12e4c24fab31177b13574e1e00cbe9c89cfd8 Mon Sep 17 00:00:00 2001 From: jakobrunge Date: Fri, 10 Sep 2021 16:19:40 +0200 Subject: [PATCH 04/49] updated readme --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 388bed2f..08654b28 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,6 @@ language: python python: # We don't actually use the Travis Python since we are using Conda, but this # keeps it organized. - - "3.6" - "3.7" install: - sudo apt-get update From 279e08b3289fb8c112fd386ccf05376c6bc70051 Mon Sep 17 00:00:00 2001 From: jakobrunge Date: Fri, 10 Sep 2021 17:02:27 +0200 Subject: [PATCH 05/49] updated docs --- README.md | 3 +- ...62c26909b3e07ee8f5a6285b2563d69bc979ff.png | Bin 243 -> 243 bytes ...7c1e16a3a8a849bb8ffdcdbf86f65fd1f30438.png | Bin 229 -> 225 bytes ...535500db0213985b2cce27db85468b60985da0.png | Bin 295 -> 295 bytes ...f247b2a9ccd480e67621eb0d8a2d7a68a285bd.png | Bin 1778 -> 1784 bytes ...fc11b715eef698662fe1cc017d7ae2d53320d8.png | Bin 337 -> 337 bytes ...a2245485b3bea72829d6ce2616269810aabb84.png | Bin 532 -> 537 bytes ...6f7e256cbddeb81eee42e1efc348f3cb4ab5f8.png | Bin 239 -> 241 bytes ...b6fe683041026ceda3edd2502ddad36ce781d8.png | Bin 270 -> 269 bytes ...8a7c48fc45fc824d5f07fdd1436d87c9315666.png | Bin 1185 -> 1187 bytes ...4b0728f25dd84a28e483181038a307ea6e483e.png | Bin 242 -> 239 bytes ...927294dd9fee97ab16ad5032baf0c08cdda4c2.png | Bin 720 -> 711 bytes ...b684bc8c0d47bdaf76d0ce45ae7d6da80e5857.png | Bin 312 -> 314 bytes ...ad79f4db8277d12b1a6f5def2d30122c89b9b0.png | Bin 260 -> 259 bytes ...9a2ee48b33448e80b97af9df9550828bdbfb59.png | Bin 243 -> 247 bytes ...3d970dfd19d2f6bfd5f3d3876a74d8816cbf70.png | Bin 1976 -> 1975 bytes ...8ff70420eb65f959d5e2a9b244fc565d9ecbd6.png | Bin 445 -> 445 bytes ...f6b7e1c217e91ec77d6ca54757d39eb1214518.png | Bin 532 -> 537 bytes ...21e0b0899a0d2879d3b8019087fa630bab4ea2.png | Bin 219 -> 215 bytes ...e62736d8aa90101801d7a1416e97e921d1620f.png | Bin 233 -> 232 bytes ...852cea72eb5c4d0a45c0db08b49476f4404426.png | Bin 1380 -> 1383 bytes ...840e3c16ba83566dc457d7fdf79faab34797f6.png | Bin 256 -> 256 bytes ...20e563212e11bf72de255ab82c2a3b97c1a7f5.png | Bin 234 -> 235 bytes ...3d0c9264473a58cc3a769f99a662631131377c.png | Bin 261 -> 260 bytes ...7bb470119808e2db2879fc2b2526f467b7a40b.png | Bin 254 -> 254 bytes ...ce831f5eba0d0e0fcb0cced170b2926804500a.png | Bin 517 -> 521 bytes ...7d234f4cec6974ce218fc2e975a486a7972dfd.png | Bin 228 -> 228 bytes ...756d8d049e32afc3747b6113db8709327060e0.png | Bin 959 -> 955 bytes ...efcfdee16eaaec575238e2e6df5f731c8609bf.png | Bin 1175 -> 1176 bytes ...7f8f771e36601a75e3520845155d09080f6281.png | Bin 499 -> 501 bytes ...3635874ee5c9865debbf9c8686e9367f2850e5.png | Bin 547 -> 541 bytes ...1718d730cd7b17532f760861e9659b68a65156.png | Bin 246 -> 246 bytes ...b2457ac9d8995a4f34d57cadac7ecbbe58f3bd.png | Bin 250 -> 250 bytes ...b2d04d69b82c2288f5ef46664d548355e130af.png | Bin 252 -> 252 bytes ...d56a5dd20011b190ba97d6f36d154e11c9035c.png | Bin 2028 -> 2016 bytes ...43100cd0a02e1e5184694228a7ac41e19e8af2.png | Bin 435 -> 433 bytes ...0deb143e5ac127f00bd248ee8001ecae572adc.png | Bin 200 -> 196 bytes ...3ea88dda29ab6daa19c839ff37b8115e0a10e1.png | Bin 440 -> 441 bytes ...86de19dbb61c9a63584f6e4ddd6e92ef03d665.png | Bin 229 -> 224 bytes ...1313ea5f5a28395ab1dcd07e1e29eeefa1bb75.png | Bin 973 -> 962 bytes ...5f28dbcbec776fbe031f8ac923f83f9260bd8a.png | Bin 496 -> 494 bytes docs/_modules/abc.html | 8 +- docs/_modules/index.html | 8 +- docs/_modules/tigramite/data_processing.html | 64 +++++--- .../tigramite/independence_tests/cmiknn.html | 106 ++++++++----- .../tigramite/independence_tests/cmisymb.html | 8 +- .../tigramite/independence_tests/gpdc.html | 16 +- .../independence_tests/gpdc_torch.html | 23 +-- .../independence_tests_base.html | 35 ++--- .../oracle_conditional_independence.html | 8 +- .../tigramite/independence_tests/parcorr.html | 8 +- docs/_modules/tigramite/models.html | 62 +++++--- docs/_modules/tigramite/pcmci.html | 11 +- docs/_modules/tigramite/plotting.html | 18 +-- docs/_sources/index.rst.txt | 62 +++----- docs/conf.py | 6 +- docs/genindex.html | 8 +- docs/index.html | 140 +++++++----------- docs/index.rst | 62 +++----- docs/objects.inv | Bin 1354 -> 1354 bytes docs/py-modindex.html | 8 +- docs/search.html | 8 +- docs/searchindex.js | 2 +- tigramite/data_processing.py | 6 +- .../independence_tests_base.py | 6 +- tigramite/models.py | 6 +- 66 files changed, 341 insertions(+), 351 deletions(-) diff --git a/README.md b/README.md index 658946d0..d306779e 100644 --- a/README.md +++ b/README.md @@ -30,8 +30,7 @@ Tigramite provides several causal discovery methods that can be used under diffe ## General Notes -Tigramite is a causal time series analysis python package. It allows to - efficiently reconstruct causal graphs from high-dimensional time series datasets and model the obtained causal dependencies for causal mediation and prediction analyses. Causal discovery is based on linear as well as non-parametric conditional independence tests applicable to discrete or continuously-valued time series. Also includes functions for high-quality plots of the results. Please cite the following papers depending on which method you use: +Tigramite is a causal time series analysis python package. It allows to efficiently estimate causal graphs from high-dimensional time series datasets (causal discovery) and to use these graphs for robust forecasting and the estimation and prediction of direct, total, and mediated effects. Causal discovery is based on linear as well as non-parametric conditional independence tests applicable to discrete or continuously-valued time series. Also includes functions for high-quality plots of the results. Please cite the following papers depending on which method you use: - PCMCI: J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic, Detecting and quantifying causal associations in large nonlinear time series datasets. Sci. Adv. 5, eaau4996 (2019). https://advances.sciencemag.org/content/5/11/eaau4996 - PCMCI+: J. Runge (2020): Discovering contemporaneous and lagged causal relations in autocorrelated nonlinear time series datasets. Proceedings of the 36th Conference on Uncertainty in Artificial Intelligence, UAI 2020,Toronto, Canada, 2019, AUAI Press, 2020. http://auai.org/uai2020/proceedings/579_main_paper.pdf diff --git a/docs/_images/math/0062c26909b3e07ee8f5a6285b2563d69bc979ff.png b/docs/_images/math/0062c26909b3e07ee8f5a6285b2563d69bc979ff.png index a6b38f5b27cdc951b0d05f58cd813a5eb084fa44..863ea67d38cc9a3f6117646a8e5c0e541e274843 100644 GIT binary patch delta 48 zcmey&_?dCSDa~IxElCwDOY0ppuEi&Gy?!&F(}9a2OITq4_K3wh7=Xaj)z4*}Q$iB} D0H6~P delta 48 zcmey&_?dCSDa|NxCqtX2OZEyPuk{U%mVUd;qQJ@!bWlL_kau4m0}yz+`njxgN@xNA D!2A&W diff --git a/docs/_images/math/0b7c1e16a3a8a849bb8ffdcdbf86f65fd1f30438.png b/docs/_images/math/0b7c1e16a3a8a849bb8ffdcdbf86f65fd1f30438.png index 209c6ab3138b448a5c869e10fdbaea7ef9e3384e..fe54e2443da8b474e7b56eb8a5b043315ef6f4e7 100644 GIT binary patch delta 133 zcmV;00DAxB0pS6VBnT={OjJex|NoIOB_99)=2J5H9o z^8p(B+mUo4Pc>iw0#^{hkPaq0!Q@2{!E%9d76SvDf}kh^14p8RH3I{$3P{{nOaVwH nF&tq8DL=@~z;Kvh*9HavGkpqSS!0l=00000NkvXXu0mjf7pE_g delta 135 zcmV;20C@l50p$UZBnT}~OjJex|NoIOB9TA{9>6-Ypa1{>4s=pZQvgWa7-0La&*_Vu zGIaCjW1@1Aa3W7LU;qMF5W$cRCOg68LlD98fN>TB1Dk@NC<6mWwu21=1D_&D+)t_i pNG35{VFW3^$iu*Jnc>s{1^`4&3o$Fy$0Yy&002ovPDHLkV1g}6GUEUM diff --git a/docs/_images/math/12535500db0213985b2cce27db85468b60985da0.png b/docs/_images/math/12535500db0213985b2cce27db85468b60985da0.png index 8ad87f4708e33c1917a4a25caf06062034920059..2d601e90acec1a6ab0e29de603ac6e4a486023d0 100644 GIT binary patch delta 157 zcmV;O0Al~A0;d9ybO;!o&!bV`)b^2lB7YJCAqYbQ!is{{|Nq*mJF}N|#~e7on?L}F z$5O(ZgrJ0XU5mj%`7pO~07Ms6U5lFL0Gf(HS&;j_F6}DQXAqhLb@njhp=|@IkIw*n zHRGz&$oXecx`FPo0Au?H6!?stLg~hye{hdBlrmFv)h3^6$ZTJ>I|F=-D)bHj015yA LNkvXXu0mjfS5Qe# delta 157 zcmV;O0Al~A0;d9ybO@c#7^5mCJNc1(B7Y9UFbG2pfrbLf^8a7EB$ayEs`|hIo(u*c z9!mvJ5`q$*bu0!isxNapUjWfXd1!`e>p-d))CH~X>&mV&eFh;D@Y%zRhuj9_x6c4# zHT&VyDEMblxd4g20Au?HoQN5_bfpWs|G};2P^wJSR8u}~sBB-u0t18vLlw&a015yA LNkvXXu0mjf-9288+Y4fqX^kbhH2L_t(&-tAdiXcSi%{ W_C84O_Ab5TeGP3QVJO#+SIBr z73~F)q_m{?B7=$#A~LPjhhlMQp+cc%y?~?=9ZT&?sj-++ppx^*Z-Uam||PAZhzkQkYF>*X*ZW-fjSg? z0}^eYK*DWZ(gpsTbb-SO*Qbppacy|H@x0|1J8}A~Z>}bN+}#&=U*LU#_h6|x$rhOD z+g6?$fra`X)av#6&1n?4$A&#qBT%oG*y~4^uS}o5oP!J3jH&6OUDtUy)9FulKvj($ zo&2HN64e&@5P$T@Wp%F0=)l`7w@Gf*yDw8)d`XtZB_e)H1$hpRdC-o(gLiCW@-On` z$Rw7ha{;9Bx%1S(;oe12w1*d>mX9N{Ln+yzYx`NqRzJ?u_Tg2dQX4p7i(0WO+#RvL zHOO>DLc&Eso@pE@-;v6h04*3am+{&NRN&_TgSO#DGJhhph%!g;cYCvrad<5JMg?|< zaNHvF61!~M<4V$o)8u;*S3*njEMimk63<(}Pfz+i$aqwsRR#Sj`!0h?ePL>jFl2+Q zB3WeXo+@x_2q&z!6}v(iR}zwVC|bY0EYBV%sKQH73BD9cv{$FenF{n^ArC~)T38Kb z2{OlI=zo#rM*DL@73hX=!Va`zS0+U-#+Ni}UsT`$mgmo?giZ!MJ9szFjZ>{@x;OY8 zZ@c3J5>(M4W3ARiAnd#rBT(Rk?QX@cY{hCZzK_KR7rR-Wv$4hKd;T0*_v!6=&gKG_ zkYQbrV}ns)EYYSpz!1zOm)N9p^n#+-M!%mt=S(7KT3;PMdmw67^@>dy zJAczu;Lv0PSu?Fs9olSx8{2pZA8mzRVYfQH_W8hk$uxE*S`rIqlkEO-_p!5}zXB31 z3e>SmR?_D7E(zd4-~xa2$wHHt7nwsyTcxNgRMZLDz8BWg_N`M3Dq64CLR~ahLxBQk zc-Al*K!o0$P0Wq57o4TsmyAo2*IUGOo_{Dw1_c%up#BLG2uQS;dpmwzLL)}#5tG)7 zT%dWAMg;O0gp*kPEz=c-qxl)z;7Su7wd^PN% z{V(7ez7WohS}%9ov%0@s8mnWFzT7Jt2g zBaQG(pDIvc$Mu;Q`eq(n`o6^eX#Kl1P2$io9gs(`0Dwz>vBkTm#ymeyvbFhyDvnizju{ zZ%C-L9TsSuANXS6A-4V*CzOTW3*n68e0a}k#r{UN%6&&fUb648;3e;SBbJ zGaqRn+!%o=bt~dZD0#p9BxpClqOED2Rdy442&bLj-07#Qsn;X)VfM^FppV(q_i>o% zz>K((SZotzJyKR_xj@sCwtrWuzzuPGFoDwLh;b_z<3Us9MQv4>Kts)qGHqzZF84;= ziS^wTSHKO>5D2kH3YnH)MY<&?+K@UPY@HU0h*;XuhxoK|b9y!Tsc2qe45{0gfObpA zlpKI$1KLzcoXIphxVx9Xx4pY}o#KDZi=MPbJpE4BWU>yn`~BcZMs(P%WCZ3dGBn!kH_~L30n};*Z>?WFe0A#h(l#z!(yDyU#U6LoF5;4B3ggOUDU4o*_ykqINX`+Cyjg6dL0vjW;R@4XmI>I+lTL?Pv8 z70M!2b7jEpKpeNuX6lk;R7ptU;c)$SdUsFOOs+T!k0Abx-j4YFV7#5giZz>J6ShQk5SenJsVuW z%kC&Z5|y*bSj#s6gk9Go0C^m@-ObcRGx8C>k4A4Uc6)iw#ulT`ncv8|PcJvp76V*L z6fVI*}GELP*8;J15p=yk%+ho^_2Y7*H zkRbiZ{Da(nwiOuQ!{m}9ms$U!{Y@t2jNJ_v)H+Y zfPeiH^~kDWj%d(g@wlJ14B@RM$xG^Ho12^uo-dik&V@^2;%t(7x7dFCJhVIyf-C}R zSS73Irqmtcfir*s{!j)icSc-f4x?;^MqQ<&)~NXvt<|c5E}E_)Adk~s zbI=<=MBbQ9%n7S!?PbiDj7yT&>%?^)FMo-OfVsKIKaT36Eau*dZ* z1G`v-4N`6=6&D)-ql{~3$n)8TEeOMI05DT*$Ayr^X|S~D1!tS7rwbM4o8@^4z6y5H zo~Myz0c?_FF)>^60+kY^_oYO*RUdR@K+hM`_QFXOUen9IE0aaQek@iByU1~%OFEroj4>Zj`4wOd z2E8YidcFo*X;jL|*e;lbhJdZrAYjsUv~oQzK_=*yiYzawd)8Fv*X$bhO zunsB@{k16&_cdVMC2d(=R^LSusqb8#m*{(%49%S`w3fK5FYDK@8K~#LZ$U|ZCm;HC zAu4GF0riW0U-Vw_)<6A(w9vbOIDLQ(?>WuXUrkk*?^&Lg=zE+ke6}oG4Sxf?9|35} z+lRM=sH6VI%*(sA44;DTEzoPrBA`(Y#I3Q^iV2TvA$dVvx3R?3x2$wJEHBZQQRhOk z4Dh4K!&x$PFbp11>QQ;=Y^K}SbfHrDT-^XCO z4f!Zv)9WwlOjnj=fJS{XFMmb=ZjJ`@qcm~H*bVxa-&7f1TcsdiAUCSCxtY4y8#O!9 zcTZFSHwOSA@&^R9>06$z$cc#oKWzcu5ep~!fSyupEH5W7jy#e@p46CZSAJ4HfTVoc zM1g;kY4qXlUVh$o@7{Hie>Km!!8Jm8^6iBa@%pgcQ}#c}aJv;Rz-~kSvH1aR)8YlV z0b*`9V$Nrb$D6NZ#+`lQ@}#^RS61(99q8NR3djz!$70GyPvCI|#1r_EzP`SF@dcb4 z^DfFi5XBV`PvBk`{Wz{IINdv*8Z*C}vLgHsL{{PISErS100000NkvXXu0mjfdPFQA diff --git a/docs/_images/math/21fc11b715eef698662fe1cc017d7ae2d53320d8.png b/docs/_images/math/21fc11b715eef698662fe1cc017d7ae2d53320d8.png index 9f31e901f9517aa258b0892dbf80d027c55b2400..848c1a669d7594cf223ff2d41ac65d84315b19f5 100644 GIT binary patch delta 183 zcmV;o07(DQ0?`7HZVLOq>7Ct3YfX`Rk$5HuQ8egG18fJ_w-uL$l)R)cjQDgesOg-gUTgdt0OfQesEVDO(J l!N9-*<=#BXfC^ZVIRNhzAwGC`f8788002ovPDHLkV1g+zPZIzD diff --git a/docs/_images/math/23a2245485b3bea72829d6ce2616269810aabb84.png b/docs/_images/math/23a2245485b3bea72829d6ce2616269810aabb84.png index 2fcccecb1887ff35e720b81267419253029e606e..092c5054a5ba9c8373a37e3feeaec615f9bc5f28 100644 GIT binary patch delta 386 zcmV-|0e$|I1epYokO6d&kraOul;#v>;1s~Ec?u9O#A`ep*uhzBQy3nAX!c-s{cSM8 z0}Kq7`wuhJ?=eWOWH1DYG<`m9aUUwn*8n8mFJLfYU@#D8U{3(@c&<7yyk>YgbLKn- z23C+7`w0vafK(I%LjaKC{mH0ZiUr0CEP<-P}Mre^uT615oJ;U{W3^&BqEP_<2Z+ETpx}c4aK)x<5V{>%AO&s} gV+m9{vH&nx0iJhE%j3uA0RR9107*qoM6N<$g4Ma3IRF3v delta 381 zcmV-@0fPRS1e64jkO6OzkraO`l;#v>;1s~Ec?u9O#A`epn88_WQy3CJG%Kr_xj=4V;HU?B zk0~9<;syrRPN2Ydh%^gW+MQ3mxPg`cg%~G5 z+zfV_B?Ch!)Du8yUVU*9u6Zyw2Lm+&otf_d5m>~)&||YZ(tyD+3@rVefkl4{155l( zh(H?5!ysP*!5Rh>0c9AE2@-Bh2FLNuLIb00000NkvXXu0mjf8|$0r diff --git a/docs/_images/math/276f7e256cbddeb81eee42e1efc348f3cb4ab5f8.png b/docs/_images/math/276f7e256cbddeb81eee42e1efc348f3cb4ab5f8.png index a56fd6a39f49fb6fe5f7a27a883e9075ba2fd38e..9e24ec8dc8d603cac114eca39d9abf512f597fa8 100644 GIT binary patch delta 147 zcmaFQ_>pl!TD?cfeu>|A=O(N_|JkVfr5(eXR0akH&H|6fVg?4jBOuH;Rhv&5DCpzq z;uyklJvkvHEx{R(7`8W^IU1_nL>whIgl+zduQK9i;c0|NlkUkl8iE~iWY015yANkvXXu0mjf4v#dv diff --git a/docs/_images/math/2db6fe683041026ceda3edd2502ddad36ce781d8.png b/docs/_images/math/2db6fe683041026ceda3edd2502ddad36ce781d8.png index 4ba3f6e0dca5a9fca04ac18f0917d7a060796f31..bd561a563b1857162b0a69724dd5b0cec937ae4d 100644 GIT binary patch delta 170 zcmeBU>SdacU2l}|yX5ZY{p(+Tn0KRk`iVpoIS9k!I4v= zc3R`prbEUGi{~~jP7n+bzRliN(-Fxp@#FxH8Y5f$3}x510a7cv7?*J|%@A==VX(d@ VXkYl*#};TQgQu&X%Q~loCIBdnJre)` delta 171 zcmeBW>SLOaU7t{L_w)Yszl~mMxjmL+U|`@Z@Q5sCVBk9f!i-b3`J{n@nVv3=Asp9} z6B-)Wd3H3EFtntZ-)BfPC>C{C`a{paKufBN#o&X1!HlEa8_W${3=C8}j10^SG#HnL z8*j)7sNhbKNq*p&!IJgp?|}mOH4{HFHp|R7sC(c6ugc~Z5>wb#B{i&UV#s3^;AHUo WC^$VtF=ztNRt8U3KbLh*2~7Y1VmqS% diff --git a/docs/_images/math/318a7c48fc45fc824d5f07fdd1436d87c9315666.png b/docs/_images/math/318a7c48fc45fc824d5f07fdd1436d87c9315666.png index f2df398884c289625dfb670d3215d5b9774f9fa1..b4849f04db19e0fbec413fbba59a89b1c546aa92 100644 GIT binary patch delta 1099 zcmV-R1ho6138M*+XMeBz&vczK-AMDlV2kN!8d9eK000SaNLh0L01m_e01m_fl`9S# z000C2NklR1sO#j;~A z>2v4<2w4ym-GAL;Zy!K;;N6(;@ zTq`>ASQ)h`twjxx>t?{S8{`F)IgUJUAvb*RfLD=g8&T*Er<5xDCjrv=?wxopIyp^r zdcdLSn+Lp?0QV3joUtjrcBn;@WOCNvYufqrli`uAmVb>8SB!Gov9UgL8v|!tT@Fa0 z^G7MSixzh!X$8+;>LpQYu%Jfed+T5~wUvXzwKBO)91_iLou^n}6o*XU%OKvjr=8nJ zQhVU3Wh(=3D`;GM3rMD8ZPDf%bVbK1^oi)3d6OE4O;o#Hvwf^)6F0>sx$v)SPm-}g z7w{>)IDb1s_mDfBf_bP0&2!{X%MCX9E!$Ew56uNW1+BRdhZqV(x+U3~qbQLl;EXr? zIoBFN=o{qZen>5TS3|ynHp|Y|euVg(+u84JPlV z93HyGq#vebTK+?6_8XA;M+e^C=U?0MfG(bz=L8%XA3u-Yc8LpeS%~{a59VgWQ3pI4 zUjp5FGj%~E4zqoO2eK&$blA1pvdSo&1AnoukGdd~2SxJ>j$U&?8D^`{M@!Y1s#6y} z#Q)BJE?jx#IMHcgBrFYNs7rio^~jgMpqd=TN+MAS(JhNf8OmH)zu>!KXkl3$93aEj zX$@?-grrYwDI@h1wN5HP{Q9e8Y*)mmSc_}}^^11wCVh1s)1eBD_&zC!>{)$S`?*;N#0*6M4POZcTQ{|}-G{qDWzo_kLUV9O|Yg=HfWGS*>fGz?=BW?N)KBX{oJj&Cf#kS2-c0l2(UyG2`AfeeY7-XJDED{@_E1|TI2^99w}thv*{#bI3pT|ipYY`&UbmOs z%O@_k!!yfP0p2~XFzp>6n2rr4i|?T)JJzC)q;6tOY7VwiZAH`eH%(iaTWpdExAs1W zlCi-cuzx8j^f?1Vx;LFdMQ8xUcjT~xnQYO|_9jL5p@qPvtU=#%kU)t@m!utY=*Im% z?+<^^v`T2awAW(NPFNSkfuBT;|L~^yOOQvg+Xo(I&pI3cnKEMZ2umnTN>qb^@d6Bf z%}J~E@?9dFyYs;p-ccdbPB#z3>W%2xPyzVYAAj|TT28@MkR4*iyS)`7X+OA-s=UVt zS^VgHfR+dvVzQiqtq^l^<)Sx}AZDJZ95bcv`kmt_#&CZZQWm4n*%C@5pUwbK=S zPt-$xhjg^T&nuurn5;ueGme9Yf~`&&z>Rti{2Q=T_G!M#et@;h@&$aDBL#OLu9j1f zRevriR8}&I7Jjb}{DQsLuqv6DrI1ojDW_-^&gp3iuh}(Ran{RMnrGaiM_0yYogFu9 z`9AA#ahWZ(Feh{JAIjB#9xy}c%LUhVv|f#!TjD-OI^VwD4-x8utOmj1LxpNM<$}j^ zH(*FTLR}!`pgOocRZD}|@6>A7dL+06;eVYq)CG|u@Jdi{^qLF&upNaOTB;^wmAddD zF6e;`{gvxPr-c!BQ_NCVc*N>aAb&x%xr&v1z6au4u9GsD_vrL&H{y1r4N7mfG&OMV)|Kve$9-1q$a|O7hH#zGAt!KbrMl8a{b)>zt?wvK40?ENaC)Cu+kxO1_lPs0*}aI1_r((Aj~*bn@<`j=;!I; z7{YNqIU(VSF=qk;(>5jr!Gi}@GN@cLEzmNEV!kpu%-g_bEOh+I(h2H${1kfjTzfJVh#ubH*X31U7|!`3R0|3O|y zJ=q|Ub$52(oG`;&&T9Dk5_f0kH@E+%ATaRzu4Q)Hj8M=Xckg9-=N=cO_x-B>sys-I!lhz!CYlr7$DDCgdr;J&OX$?8q=OVX zg;a15A#01^O@dMA4F%h!xusJF;}CF>1hI4}_$MZ$l|bL!HGjFg_g*eBvtPKE3vJ{o7ilcCJ)$$tzS&&(h+_r|9S?QDgA30CSq2LoFMP!e>*RuOpT$Dk6LzmP zI_u`3v!IT;(6X3x$qC_EVFc0gH;{? O0000)30DU(bFa;Ud6p^DLc5uD`L;eN^7J)#hGTvG+ z)h_^&Nnmh*nZt2%14K1YhJm2~Za(WbAR(W?5D%2*m;hIO1f+_;fgu=3u>`=(u7f!6 z0>c|1#Vi03-B$|LQvmjk6x7|vU}nWdB6(gKB?JH=aUGCrf+(H<0000)30DU(bFa;Ud3X!8Bbg;YtL;eN^7J)#hGTvG+ zl`jB72@DP}b2v_JfC%>s0OD7^YAQm=z1)Dd={(2tk=`6J*G6qHZt=3U{){* iu$GW%3gDTSAjA;$Kv+a-@7)UwK;Y@>=d#Wzp$PyZTN+^i delta 79 zcmZo>YGIl%fiZsKL@`FDi7VvnSQ%bc^Y9cpbUl_nK8w|1*6Z237$zhogeNdC|6>)9 jS|Q8B=cvKvZ@|fr{7g7gc%Rk`1|aZs^>bP0l+XkK6)za? diff --git a/docs/_images/math/499a2ee48b33448e80b97af9df9550828bdbfb59.png b/docs/_images/math/499a2ee48b33448e80b97af9df9550828bdbfb59.png index ca4a5abf591694c432b4c0b8954fdb032826a193..256a838f4c04567a714bc17bf8a28e2991251d39 100644 GIT binary patch delta 132 zcmey&_?>Y=n!3mS-$tJk=B__~SBvfcRR#tI&H|6fVg?4jBOuH;Rhv&5C>S=eS0YF- zH6X<+J>W@!K+=np83D%=4s7UXOyr*sb>P4S&MRpL4t$ss%(&!>l5w-;grHbu9yU2)z4*}Q$iB}i=Z;B delta 128 zcmey)_?dA+ntHcRYNerH@elF{r5}E)AZ7!Su diff --git a/docs/_images/math/503d970dfd19d2f6bfd5f3d3876a74d8816cbf70.png b/docs/_images/math/503d970dfd19d2f6bfd5f3d3876a74d8816cbf70.png index 810c3ee84c48d8e6068efac1d043beffddad95e8..1f2f8f446dc27cd1abd675e9c4077b2d1af9fb6e 100644 GIT binary patch delta 1852 zcmV-C2gCTd54R7Hkbe?ML_t(&-rZS!h!kZQfA)KJW^bL&FA~DbI{HxXBqI92R;Zxv z4<|bDqd#2#C_yPV@sw0_A(i|ic8e_2;D)|H(z{7bql-MD=#MTqgCK}D@3#UppUoD|bAR-s0Ih99!g-#4%xG=Y zsfs`Bgw@ip+rS?-jI=cDXMw8cWfb-!*ec-D zw0@K8_g#GlTYq1zR;z@FH%bpbyN2K^WrQ9Oiw+^+Xp^f3I*HwE*r%@0PxXshK194x zdT2`;urJbuhnPVfu}UhBVgG?oZu+~OtU1(R#(0Q$qx8@=P0x1WmR1?iQ^YY>PZkjD zgo&x?4Zxo5=Zv$>CH9s+_V_)_S8$oJ5U-OiGV4^`r++NzZACZ(TGuiCb2zNyE;dj^ zu$O;(-URsQf3?RuE*pgBw)dTe)$ii>LtXkN!hEHYM8``XnYVBxk`pev_>XfGbzZ0p z&y0n5aJo2sNu@nOdt%e4TLF4O?d}Gk=YuJ6eEY-w$w)E-|BMO%)W3j)|TT&!-R%P8VUf)q_p^N!^A03mjlq zszZLA-?r_Nt5metI8%-~!MDh}hhU?5SVFgt8h^qKZ>^bL2h%^De(~UR5jLq502>V} z4A0&<39Lcm7P~So&#$EA*(V;?PKW0zV=pTnNR08J^RGtdLI3w8~#D;+Kc~`tlJ&f)5+qN#XBy& zvV6qBBOgcu@Mer~X_*mb(WMImWEZRc%zBi--hT*jE83Bn)DK1LVVJV_q?H+>MX-yOQBlWI z$rjA6X8krg0XFRTBI0;mTJjomRW2Zsc!aFC#h5V{4@gg;F-%^xN(s_M#;kGPc%%yI zXv|1W=9dF458qJsegZNDQiQl@84WEj;kmFo!?tp?%Ypr}Ne0CtnSfpgw@qepuYci# z4>@Mc#e>qF_PPK{z?e0zG@d{YHIXSq@rz!&5kuFfEhJM&=RSy+6&Il{QYM2S6P~MD z$~E@GP0og$ouI@^dbc6t&ZOrcZA{18rN}Yk%vhsz_`!B8hRwj3b(z7=K6GR<;$pzW zogcAxbsR4%E<#gFsdLR_@W0KWGbI_*yt?Ur^?r?4Y$0K9^_ zn<^`(VRxKX;YVoGA+i0-buKd=Zj^pZTcnxlY1}Vyf3cZ0sWcArAYOK}N{@$uzSAcR5d5#_(Wy~5^+I796ZRwi#Pa9WyuB)w$M=w2?wsNotZUN=2Ko-@Zt2Uls9IcFss z#u-_$@SUS>)Lw4c%N0|LKCllkRsv~c2`0JAJ|L83l3HI-hu4{4>LYg373xdRtCD=C*~^cL4OZxiqa+T`+o%W zqjW!X`|N;oE_d{Stx3)0VK(6*dzLimtidy5E?zes!`4n3?1}kG%KX85CF#N|76=!m zH$G{E;JLJJU^L0b(Q)Q8xxYd@I9+7cYS7PErjKF?8J#+N!a0~R5hmU!y{>ts3$IxC zsH|tE-t2J{=|52`hkuASO0RET>B1`(J}T>(ske(R`Rxc1Z7Ee^tGtF)u>B1`(J}T>(CO40C@hBEP zD(jghH;;7T6^kb->6vCXuXGU=iyQUVGfit==^`o?CmN(@nl;wE(nVA(PBci*G^+{H qMNlj(8l-2M-ORd*#SEioT>B5tzvth8nw01O0000+`?vLCh$|Ry~C4L+<{VV7^jKlJoAzXU;E1L>heYAGX|7_ZW5g#e>tu z-IrF{88j!}8Mq&y2jtm@d&;nb44g7&K7Lc1x{yQ%Jo(%08%gVQ(UmM(Ja?-O(@a-p zl(-B)*nbZL+^2e&QM4`&1bojkV<8@#?japmDG~&GEq)xduju;T;Lw$qJ^7^bTXx># z>MU72cSa6!R&1Q?jexM_5-3-Fc<{)$=oxk$g?Mnf2%A<4f_;^~Ce?xc8|-1IU;dLc-7c&;(!RaDwT8Y7q zxKjSUL=R`Jd*X8LalZMDPv>ViAH+r(9>Q@_9lM50MicH)D$#bMb(51u80CXw?d&%ZyoLUM(^P@@ULRP2`sY zWf)GG?EO2)lxPv^GRr7rcqz|?Z5h_@<9QqQTRPx-THqz1|2o?#Bnvz7me%c^1g?8?}IILL2bkQ;y0-5q$*^sWYA8ra3 z>@0chCARo^%Q$<|tI(-U#pzP!m~m#jSvve=ITpf3V9dIg!OlKdGPsh$r*t=ZA zVZ|b*%OvnQtJV~NUs=D?EZBVpNPjkL*V={-r`wFdVv2SpZYpVS+&?`lk6AzRjuFw! zm^Fn>Lef|&F(JKHfmE@I?43Z}n59Li%PiA}_dY{jHkl14%2rFesG6`N9|62ex|{4( zPD9RiT7#dVLxsfLEo&TRJk%`xn6f}o<&)SivAhw_^QW_*8 z4^|m77P^s7`u>&YmV>mHc#a+&Wy~5YeQ>3!Y@SWp;I%hElrH{?#V?wjy?oJ~l)eAH z^t~gr2)b00z=MnD+Ef?pmVaiQc^27X)y;+EK{qpAtcFWZ(2*!G(!o`lrr)_j7u6uA zSbVgif7BXoua_$(mwjM=;o*sUI4)G?;9AU>k2gw3*d5ih&R}zuHWN6SC|!!azZy4= z(%wRgh`X@UaJc!C6vQxR2daGUK-=#c_XE{%>DXN|u*i90uF`h$uz$KJU5dW{BWN6@ zI|(f!+F3`e`M_2@&E+vhY#=*A$1JPy%$SQeOvfdq9M{+r^OdAID6a+S!YdXC7d3Bu zTnoW-&ANfnB%2-m%%^gnLOeKKWY&?OpRsfw#S$_)dG@q@(sd(Dyjglfb4wRqvG7r^ zo|$~Jwxin)Z>;x*h<`UrZvxvTU3kU9N4&C(m2Te|Ry<@2%=_3D|) zw^>gyQ$xg?r8hB`bm0}tjw%=R=$TeGmvrG3i!18UGp%ke>B1`(mk#pinHD#fba5#b zKI+voEp9I9!YdY6)T3uw-Q3beR4h)^U(d9xxuuJ!SnOz!o;GP&b4wRdvDncdJ=3Zt rNEbn|uxOB;X?0T@Di$@2o^k9yL>J~*cAY4H00000NkvXXu0mjfo4S@4 diff --git a/docs/_images/math/5c8ff70420eb65f959d5e2a9b244fc565d9ecbd6.png b/docs/_images/math/5c8ff70420eb65f959d5e2a9b244fc565d9ecbd6.png index b4b1431df2a6bc6d0e7dd771906bb05cc60e5f34..201467ac5d3e8a3fb1970cc00660a80b8d7bbeaf 100644 GIT binary patch delta 316 zcmV-C0mJ^i1HA)~XA!Ua&vczK-AMDlV2kN!8d9f`cqf0^kpU;*{^g|f56oiw&cN7! zTOk8i00WaDn8o;sfeDX7hB5|*`x78cX9fkVN(3R)1qRNqP?7mz0$5efhB1m0U=A_I zZY>9tW?T*T+;{9Mc^sfR9>7G7{sw7;uvsr{Kqw7?xN4UGRGKru8KkiQ>QN^k$|IspJb`$6ZEuJcy_ O00004&q1};1b8QK{b?r(rFof#HjRniEdE--L^g^KJC7r?4=6O7SZ0CR{1 zc56AIG-DFnb3d@FQOHs$ubiR z=7SAmw_tzZfk^iMl!gNXC|%8gHN*_yEXD^Ack&!y$EFY_4FVvl5L^5I|H5jf>IMKuVnf!>$iq(n O0000 diff --git a/docs/_images/math/5df6b7e1c217e91ec77d6ca54757d39eb1214518.png b/docs/_images/math/5df6b7e1c217e91ec77d6ca54757d39eb1214518.png index 2fcccecb1887ff35e720b81267419253029e606e..092c5054a5ba9c8373a37e3feeaec615f9bc5f28 100644 GIT binary patch delta 386 zcmV-|0e$|I1epYokO6d&kraOul;#v>;1s~Ec?u9O#A`ep*uhzBQy3nAX!c-s{cSM8 z0}Kq7`wuhJ?=eWOWH1DYG<`m9aUUwn*8n8mFJLfYU@#D8U{3(@c&<7yyk>YgbLKn- z23C+7`w0vafK(I%LjaKC{mH0ZiUr0CEP<-P}Mre^uT615oJ;U{W3^&BqEP_<2Z+ETpx}c4aK)x<5V{>%AO&s} gV+m9{vH&nx0iJhE%j3uA0RR9107*qoM6N<$g4Ma3IRF3v delta 381 zcmV-@0fPRS1e64jkO6OzkraO`l;#v>;1s~Ec?u9O#A`epn88_WQy3CJG%Kr_xj=4V;HU?B zk0~9<;syrRPN2Ydh%^gW+MQ3mxPg`cg%~G5 z+zfV_B?Ch!)Du8yUVU*9u6Zyw2Lm+&otf_d5m>~)&||YZ(tyD+3@rVefkl4{155l( zh(H?5!ysP*!5Rh>0c9AE2@-Bh2FLNuLIb00000NkvXXu0mjf8|$0r diff --git a/docs/_images/math/6b21e0b0899a0d2879d3b8019087fa630bab4ea2.png b/docs/_images/math/6b21e0b0899a0d2879d3b8019087fa630bab4ea2.png index b63507e2dbb54d52a457f422d625577774c81c8b..cdc6e6b8a4e2e7a55594f94bd41e30a1307af7a5 100644 GIT binary patch delta 121 zcmV-<0EYkD0oMVLBnTu>OjJex|NoIO9~}Sy$0j~y0000BbW%=J05ba+bYQ>Fi}SC3 z5#h3tY$`<%k^m%F1xkP<2g4MQ2=itT$ua>%vVmk7I2}NwwE>8X1na5-Vk1FiAj#e) bz`y_i0aFHA$_UBp00000NkvXXu0mjfgEuQ{ delta 123 zcmV->0EGY70owtPBnT%^OjJex|NoIO9Faf>93quYo&W#<40KXXQvfpi7<7xjV9)cf z-RRf1WsztqMc|qYB-jO7fg}sV6p#q>VGzkO0YtJTfJjaU5NTroA}hhVx`5b7P#H*C dhY2t+000Qt2BMjfmCpbG002ovPDHLkV1lg7EwA=QYBK;sxZUHx3vIVCg!09hF>r2qf` delta 134 zcmaFC_>yr#R=v^OyAs_A>(76F`IT$#bp{3o&H|6fVg?4jBOuH;Rhv&5DCpwp;uykl zJvkvEB#kp6A;G}dpLr4wql~b`nT8UBj>89@GA#3vw6NqiFnE7bYi>bvuvY-R z%o?0VveJR4U4OO7SP->+?EY#6$d^);dbdR;yy<6QKfwO~v|Vj0J-0cklxz8)BebxY zW}D*S6#JJAaZy$(lw-zp>{6sDQ-?KM?%4~AX?)41KPW35CD-H%9+Cy^@^D0{kEs!- zw5gK04F>r*NCie7`(?qq{`xV%mpTai0M zj<0zrEU4@B0{1iC-MS2XsIE9u>I=Eg1bMisj_E;qJ4bJ4fg3#7pAf;Ctn?LIc6kSV zv|Pvjixov_JMFTY*2aPN4U~VyW15u07LWK|6FIiXhOr!c+ictQh z5;ZfXf`9&jrWS07QH-T16~C>+egw`X(#dKs#Ua(~gDK1(>a66Pk^NRQ)^Ux+Qa(4z zM!AL}5R6Ky>Etrm53}zkF<2lNx21pU-lE+#*y~fEWfF1EXOChm?v{VIebVDLN*~;H~u#g7LbnlxqqL`Hi2CQiC#h zD1VG#v7`KVw`G^hExKUvG(rf*n-R~PP8DJd<{?~wV2g*DxGu0ujS3ipF}Z{FZzDJt z*}B}KYX%DhRq658N6B7rYFE?mNDK*e&%82`r^wZFUl}F! z!L4o=NxEDfAlLOQEXcncN-iHp7fHH&)PJp!Gvq}U7VJks+Pr4HDw6c>N`97{(=+Vr z_rjE>^Bn<`UcLe)=VJmokA(%R$PWZwu(sxjBs~~se}{BEXSR@UP)OM=*c?92=WEMS zun?3Ou#asewNHXXe%_j3qzuf5z{7FdRcvMfwfA zg;Pw4{O!Auy_gvkOV9Gm{9i(C7L5Ui5w}hh?0))t8 zCxTgpl^EFlFeP)B;+J|A+CMWqEPwchc&4?J1{giQA6d#KreNpkH`J42_uJ(5ipRqO zV|+90`^lP(za*S3CZ6W$H+q#2&BuI;eEkRNBGfiz3Mt#$+%Q$#02`K>}dwpjX| ztyo7AyFi7mfRMcPP?@p-(j0@~$)5YjsgZ4J3ek<6_q zkjrigF!jv;m00|6fofbY5IENRN*cQ$vMHuG^)QQ02S`&Cu((L z%(tiixqoW%E3cA z1%JyOCH}i3BM#T-fP?oyqhP!nip)KUK#YU=hVd!b63lr^L|NOGgYnh<%)Yge(jhgj z(J=>03dRpBMZxW|$*TO%WaN_-08d&YFe$G6+e8h#0@{yXXyPVi0e^uh zFeDO^-wWG5J?j%<=j}J`j#+1%c5n~CV)$i@dTUu{ZY{!?5sm*d!gjZYM2VfZ-@per zC)gu>=yq0p^dr;1i+AS#5Y}eZ7_eFWuHpMsZMENc+NjEJRWCg#xGBHra+{A}Qud=u z>x)EU#4gH|%ol;UBt>58dJiuP&VP`|v>v4;U6&}9tiUFw#LnAq1T)3xd*t+bg_i}! zzI$OmOy1evS7oq8)YH8EMmA7loW3l-%gM69P;ckL)W_Oyt5rcph&0lW#=~igwcojl zy`dbNBi)FmB{r*+lk!wRC$>pXc^#DnZKon$!A;1&AwN2h_frFV00000Ne4wvM6N<$ Ef(qMvn*aa+ diff --git a/docs/_images/math/73840e3c16ba83566dc457d7fdf79faab34797f6.png b/docs/_images/math/73840e3c16ba83566dc457d7fdf79faab34797f6.png index 8daabe53adbe42e1dde947f336b03be9ba619e65..44da6bbe71c33bb2473e0fe13e10cae2767c8dff 100644 GIT binary patch delta 116 zcmV-)0E_>C0)PULZ3{BLi*)l~ul4A^W|4R*UK)QgFtA!MFc>m0aQ$FlXkcLA{vN;p z0`slF6i5A8AQ`~G`@eyKWdQ@j{sj#3K!N@QhWQB$43Z2C`Bnja4;UEOijAH!FaQ85 WW)k;SB#$Ei0000C0)PULZ40lzGK+NcU=8Tc^O1NeUNU|%FtA!NFc>m0aQ|XpSir!*^F4q8 z1m;_VDX#i^Kr(=V@Bab@mJJLH=Ql9O0|nM6Fw9S2V31^B$hQty(ZImKR&3PDzyJU_ WoD$^S9pbPZ}ub<>}%W z!f`!0LBL4RG&vz5A$$eL42~r`C0+?JNrW`6xymC@z~N)G$tF4Cz^+EcNu^S&oe!9> u#fwIkvQ1+aP~F2QFw0of$+(??LEn=5+JQq((}AWkaCo}nOV3Oljd{1|aZs^>bP0l+XkKRlyFK delta 45 zcmZo+YGs-*fiYp?#9&2^?#{-iOjl0rELPxQh}9Ea_v^dmB?chyboFyt=akR{0E-V0 A+W-In diff --git a/docs/_images/math/7a7bb470119808e2db2879fc2b2526f467b7a40b.png b/docs/_images/math/7a7bb470119808e2db2879fc2b2526f467b7a40b.png index d9714cab50d88946fa082b427b12ea03f18bf546..210ad4d30737a01a86ed31e6992f13aecd898b6f 100644 GIT binary patch delta 123 zcmV->0EGYk0saAyXA!Ua&vczK-AMDlV2kN!8d9f`cqd=kk%57M?K=Zw1CVB5{KUY- z00zzs3J}KpFaZdI%N#;8aDRs~j{b(woB_@dMlHkq3t(du82BE57$po0ygz~TjL#Do d=KV@w005&15@Z4zf%O0Y002ovPDHLkV1mZYGpGOn delta 123 zcmV->0EGYk0saAyXA!^q&vf&hGTpC8V2kNxzy`6Acqd;?k%57M{Rac%1R%}8_=|yy z0SufO7C;#L!v!D=HVX*N!1M#kIQj`fa|bv>7`+VpFMy3%z`*wa#Asn);QbAxcYfZ$ du=dN(#k7B`M;}X5lu%4$SPlowqZyvkN5R z)L;-9NR77Jb4%=HOgwCp+NEl3Yp=<0*GTmHfMrwz_&*HajDJK9l~EC}f7LOQys#+% z*_hUz&`exsa|ev@YEi>aM}tXJLl&EP`zGQwP1O(L2ki<8%9DEE6?tbI1Ltd^jCu|KNr1~D zo+WU2OsK}{NqZTc`k^B3P~z4mHI)XsZgf-9RF{-mu* zbWld7@7IUw71dP2ceZLbE&t0{*Z38>z?nwO>p7IvrCpgtfjWs)nd^XTR&){VNCUc$ gfqzeL{1p`V1JgWLCRq`!)c^nh07*qoM6N<$g5UVId;kCd delta 382 zcmV-^0fGLB1cd~UkbhuFL_t(2&vjBgNJBvk{2+{UcjWOTU- zr%BlYz*+P@gt$lY!p4eSh~p5lTW~lV5tB7+$}^5gnru8cDj}lS)yVQetkrf2-Vq1Fgpb!OxB6in9JqZS^ zC#hBhyFExFA{4~dAMk7s1-;Z2tQQZBUh1vu4;BR5YOST(T7SQp$tJsXvb5qHGD+Uc zdo$mB-%Nmww!WD68aCffZ;ZBg_f$Ms_rBNt7g2fB?N?qrdeeN0|IpsVSXg^DuVuB> zhMO`lqQ|Y_-Rt8>t>Y?ri6|#2yT04Wg?9hG+$4Y%!(mxqU0YlDL~lu>v9%q!{kA;@ z<#E7$+q!m$e18%$Q9(*ALFt29s`j`W4v51@yX0jaY4$J9m5?eDaNfl>>Sn?Wc=3Ey z?fMOzlZtYZ@(p#ALVmT^%Wyy(Mm@{!3%F8*wv03bnls2ymxXwQnpIbTK@TOVs33K) zZyI*QcnKIo%&QZVHp~?<#zdBlL8m#_IqW&+?iVD)mNgFVx;=DMFv!ZIJ zNpl|kJpC8Xuwc;wnL!Fz^FR)oD-{)_h69qEh`czQpAlZ;)cGuK)E<&(D(#9mvSv{9 zZm?)eMLEe>s_Dhy>yGexz-M@m1JB4t>t=WJ!KgB9nhlVPa?-@FnqDDHUdoDc$_diT z%$0K;`+r2&Tx=QxE<-NLNV?O(5+}|pg2^kn_>op#=U%rHEGld*5tLD`>nL4^As1z& zUX0N9nj7O4!Q_?TyS>6oC7=hg_6MN(Si?f))MSs{$skE%|Vn8-dxf zP2lx|U+WN50kQFg|MS-;Aw0w%9X9)B{tqiInJPkXrYg-x*!Jp{1wVnu69 zMTs=D;9>Y@o@Ezn$M_y;6bhdU1s1PLn7p0{RyXv)gV5(5@$!P@;})qJ;y6t?Qc)|~ zLk5s)X=%x`(8$_=@R|uHTh2{()Mzvlhi{pDuWFNuYIb#{Q9B{`%2u8B(RcM_YbWAf zm0F5M<5Nn(6y*miha)dFtgM|DT}G)GD#(|lqN5RE%&Yke!{Ca)n2B&x>7F|9R6l?-5qCU8v=s@(+Cd{*metoG&}_bQxHZ{+n<}K?I1yy z7PCuGZ4}fZ;hYs>D>`^kw@9MX&Lyx2G=h>KwM?;GvEO@dW`AdPeY0rvJItG%?|a|( z-tYb1dkbtfW%6-s4=9o4BwNv7l0q`|FEe>1!*nz2hiSmrSz_ zN~_C#-?4rq4SzPKjiQ1STY-{AysFN|ycYo@ks&Yrc#Cs$J`1Bj!1WAvQZoyj6U^5y z*W%}Y10SWLoTU6gBbAU->Gh^OAP%Dr3Qz7^BRq2SpQlNsO^R$;P13oEr;#N%Q&-pnq71I#N+Vn&>tu$x~ro9L81V z1x%CX0v_@5U%JAQMGK_63uY_;DX1B#s30{QlGKFn#o_d<@M@|EZb61PKvD5^y2ViOL+AH*F5b5WTU=gZwv0IY{YCdKrYHjlRs;Em0|KyO3EqA zEiXOg)qgq;il(K|)CSyvT$GVCtBVy*n3oTeSG4qiR$k*?FX~w;*;>NgMyatI*oTwu^k8b1J+7Fn@U!dCKsKrbS2HbRN&XvnggVKmglb zY}1-jQ6h~l<}to(v;D*3OpJrM+?QOA1v`W%M9=rSybfI_4Oq000000NkvXXu0mjf{codZ diff --git a/docs/_images/math/90efcfdee16eaaec575238e2e6df5f731c8609bf.png b/docs/_images/math/90efcfdee16eaaec575238e2e6df5f731c8609bf.png index df6552c7107ea3cb12f5eb88aeac807f5c7fbbce..a63e800321305c08261fdb10918af0c867438972 100644 GIT binary patch delta 1088 zcmV-G1i$;23783xXMeBz&vczK-AMDlV2kN!8d9eK000SaNLh0L01m_e01m_fl`9S# z000B?NklLKb&Qw1r`_KRz5jXind9)J9c5OGo`Y6Q%QNub8THUE`!? z51h6hSh;aC+Uq3i0dD`U^uac=4KtljU&Pf6d>HB`yJHHL*qU0Kc|!+fQeT*Qlf-9L z>2{rxMiz6SF?wvy?2E!!%wD;SqpDJRf?|IT1bJsTzM0v(Mom*zMDRGPBuwnLSJIG1 zm~pY`jDISKk~Y30l`dRWtdvp!(bcFm-g?b)!Jwjwfna9J_<*Gn0}!=wI)N!qgXyy$ zkj>NHpH1-;C~@_EpKP#6N4oZt5;xfh}RU{(tBx=5`yE++qxGjHew`Alng!E2b|@xc8;4 zw|-1nFZU8K%zE&UDT8)|p{{3Xb@LRJr=mEin^_N`1caa+VN}S_3pTA3QtPJffm=%+ z3B`smFeqggCTOJ*Qa3e*A=6K7;4N|a6d_G^0QR*NV#(2m}82+4|)HzJC z9`X=|Ua*g6{?~3;$mAg(VPp!nr}{PBJ~9i+nOz*#J^TexS~6B~$??Ge0000Nkl8|YY{CTKytA-H;bQPAoH zFEAIz@J-uC#ao@ASdbNO;EN54_~65;s8eyXh@c4GrW^Az@PDLj(wvhfU8MNpK9=U} zoRk0g&X?qTzZ}5EI)`v7fTCCf7b=bru9P|H&CUBRxrgWGZJQl+G;rAnxJdq{iE|t& zaX;a)XnOj^Su~nqF$uT)&Y4nTgOtBvGuAs(nowt|65D4!iQfp~^;}0vt`V&?-%qTC z6lNrn?uhfJ=70X68h5Hnc?QwBk`QPnzvVJjxfrEU$4bYMNCG8vWg|)y&Y=(&#l#eZ zrAcF)_%8^vZWgpIa;JU)2kCp4!T{@LA-nx8jyuCqoTh3$Nwj3#bZ$PjqLFcP%+X3Q z4vWp>A$t#-Bx=pg~;P!3I6xNWVpJ{yXQc)xDp|6RYI%G_SCujYl=|`@$SXs6x3(00H4Rx2$Gx1AFtpEJNkbZ8 z)We1|EPo?G>iG6_xUgNeQc45FwPjl6p64tVBxLmu1T)i)S6M1C0MVn+0Zc3PX0BdD z4sYw6T!^Pcj7khZEF{NCdUkQNX0B`_$IVh-uE)BRiZ_1$C%->x0*E)Jd{?1htx8q_ zbdYEzh0$COK1D^G28bcx+D9r*+snf?oB>?2<%@ zs{_9hcpcY!&`K0QaF+(-C}%R#hQ|u7%joNth4T#%KJQX{9Ers)GfHNFD9j~V4<=3~ zfS4r~Qqp?x!OVG*1wgnq7v$P(kGr+d7M^ z+YknNrQF5@tu#RLrp7R2_`&Yg!#0G0>3f%eBl7E|TK6nmg@&j>109ykC z!z~8xuvWGUy#5>5PlKhlW(4G3Z)0Fy4KxdAmpTK}1O^5v239@?km&+J5Ca0907LpO z0~3(R2~@S3fd?oZ$-uz10Vu+E02b6hLm196m_pt4l_BB+m=zxYil~3E82bt&c5q*S zsABuea2Uwq3V&eO1(sa`#Jr_I*Mq{6;~_}s4$unkO(6d>ZD3FWi7@X65{%720g%n? z4?tqZP}cypLjz$i7f6;HYAvS#h&&1m3}~Pqg@n=t?g>b$_+bWQKn1RV2*yeV<}W}` zK+I?0^#D1B1t_o&79$&wVh)+jV2I4Y8Qnk$0H|O>q7Xo)IvoH2002ovPDHLkV1joy BoUQ-> delta 405 zcmV;G0c!sB1M>rrXn%CS7+}wf``z>DGOs&*xOo5o010qNS#tmY4#WTe4#WYKD-Ig~ z00B-(L_t(2&tqVK0@elw8~|B@1(za*5F~aRPF2iEh8@7E3Q3B&0k^7Wa2D4E+^SMQ zgpf%90|O@mLtgHS1cVgZvwJY7a)Y=990g#O?LNK&BsbrF@P8c2LU0MuG8yFa57#R2*fRda7K~NK(07LpO0~5$b zpsK?RTtMk)1_mAjpa|ar1*j^ZAq?vnS{cA@X86hwc>&Cd4**5fKUj=?1rnCb7a*$G z{xYlpvUma*Y=6MAM}U~O6zF|2KrJ+C+_k9VJc?;nh`G165GJ@5pOsvIPWCYt$ znc9iBNC>J?nQ`JR5`s=t8fLsjLK5#vEv!ozKgaJ#s+cc?bw$a`3y_ga*I8V@Qs3pDMKdXI%=PdS z(6)ReSr!%=xS_}ub^b4H0-8lG*#Hmx{sJxvj*q0bgyq#8&&JY;HQr}f7V-#oQs2_} zemtxHt2Qvf+eHg;*&QY=CDdZ@!?SoT?qBsV8(b>~$S`&-b)(jDgIDxtwkppW6dAl2 hb$_k}{JZF1!EfXSTJx_SEqMR{002ovPDHLkV1n{$%o6|r delta 445 zcmV;u0Yd(r1fv9yaDOs%^S?JRG9mx~010qNS#tmY4#WTe4#WYKD-Ig~00DkUL_t(2 z&tqWJH()>pLZ2DX0lE?fS5z)b0EQNHB@F4PT($yCEoe#@I#IbC6EL-)DPj1C%H|6$1m$0q%d-1&`c1D!5vCNOk_xS3ZW|v7y#3r V3|jE-zHk5l002ovPDHLkV1h6oF0%jt diff --git a/docs/_images/math/bcb2457ac9d8995a4f34d57cadac7ecbbe58f3bd.png b/docs/_images/math/bcb2457ac9d8995a4f34d57cadac7ecbbe58f3bd.png index b5c473eb4afd7f8377ab0ed25c0f9eab894ed251..7980acbd0d49351ff11249d482f867c19c36f9c6 100644 GIT binary patch delta 86 zcmV-c0IC1_0r~-uY7Xgi`xr8vi%8vIb1wWok$6Qt;SB&&LJc62tCE2YNWFi+z{SA8 s`I~{^6az!M0|SpH1H*k0;dliG00Y?&3Qv@Xy8r+H07*qoM6N<$f@@JCmH+?% delta 86 zcmeyx_=|Bu7T?{H-x5Z1yFFeetd!C1o><{(o4A)@S&Ni|O1LVs{Di9i4UOH6ji1e# q4{Nfum9ewQ`LeZD2tNGH#ls*GDbO`taqCJ3An96~*r_o-Kcqm+Ug^GZQAO;sheL?^OLj#bx zngI$9K`2I+Zww096}wJRc~Lcqm-u zn*j<=Kqy9*Zwvtpu>}lKtjkyvI5I9U__heOGq5o?K-KX$KpA{6os}?#5|mar0%c5X aU;qFwhYt<`L^dLe~#{ zs1>|{vT7f^ekdU+2XU2L(V=W=Q;O(?>XoG?f~5Mai{NljDH>qZ5lo{7^Vef)EyJP z?+JRiy-hCrKFx(_K^gT%vMhHQx7uP-sk|DZ1!dG79UylJlWlRCoD9>FtvHdVYPsxc zi_4CDm==^#cRX9mpY2boK)kKzla=(}MZc6t2V7J$ySxI7uEoosG^Y<2_|sAz2$QWcFf zq^4juc4-3IVAHvFxZD@2?Q3>_n}E+#lk@|L$OQ6<*X4WYFWL^5>eVnUD5I`O=rF4E z;B8E&4bgJaz%H{z^rbE9GCm!q1!dF~ow;le-hIkrxql?x)N+XecDWjojkr{#R2Xxo zj!diw-3z=X$?&4LW??T%xsD=y3tG?7+{bV*hc}$DB61nu_6d5vy#JpJ-g8-p3_8xc zo{L}mj7+sbvw&;b zfj*OIT72<lqY=oE<$!D7S zm18}ero@}DG841XMf7OJO>`I)Z7zrC)VS>Sh)g^0II)GK;WgpGf1Mo4=Mi=t0v;`? z=F3Q;$DifVsDbX0W*4q16;P+5dQnhfMbbov(a_c0MP`D2%71g& zTQXE)3AlI{#zQccKkHa?b@upt7OOQ0mn0S%1%~`Yt*3l+9%owWO zkt2+OpZ|3CxsanSmDd4YLK~542k@K3jBewM#rIQ} ziOctXjkik8s)0VqzJ3#T=g^WvA1CAO)jzL<)LyV@)KK)9i)i7U*UQ>q7pOO`JulF` z9O`9i_OX?HvkCG3x?<-CaDU{;6luDR3?tH#F8q$fV3r#@ERQYR?rT|1nh^(^Mh&6x z9p>y+rl0N^s0We#<*NPo6k8ecRf`gr2ZbKrm<(I2x}V@QSq5=QXw(o2r{<48AkaMv^-!Nm3m?}dv)Gir z6x2dV)N)bM-DEXsh8M*eg?)6-LVca;E%B=lPxma;vnr)=zuM}EOoeGd89~9*Jqz`$ zN@?7$wmK4%VOpX=^nd2*o`w2%ik?bnoKydgNaw?}pp3xa=$?i8;WDFQ8t2siBgV>M z+E}Ah%F#Uw^_+@nTvS^eYeKa(jh5!L_t(&-qo2~Y!p=($G_QIXJ^{o2@oG3h7~17Ay8s`QGxYM z>_gWJa`Ay7QAs4qhERysz(l1<4P>hkCGfxmFE3m=KodzCu|5$(jg`d2#6&hmMM4ZM zH(@2kbD7yax1CwrWrJS=%bx#tzBB)sIp6$d0l4R5j_^G}KYzRyEuZL((!w$tj=b$Ys|zR)iLo(P)@uwaa8{E~YlID?$s)XgE4R?GnaX;<9ZlN^5JzfxKML<)W6j z?8rrFVHpj_>-Ajz))JR<3sG8FMkBH6?cqEO0A#it9U>e2QXXE!E)#on7ve2(xz3H! z!ZI3(=l+2%=zj#*sc($q9;A7DVFPx#>ZNS}*_OEMj?}uRw}74*fQ1Iy+nzpQ;(?4l z4tudnS3?apZMDMX(nxLTbo<)~yrPfMe~^fcAfI?&eu4hPTj5f>5~YP@G!&@3F{afws6B4bdbyB!tLn!^5uUr_~5ky_M@%1Z6&Wh zjfafxkI^ySDB~T_{nWh0ww9OY#mA-7F^ks}8MUzK;<)hcN}WS{XZTTo9x&z~?Qx(V zohNg_Vt@R+OlpszmOCuv_XUOYO|CeULJOY4Tf71qT^rib@tGdk=Z^zCV#xr4&MN~p zrv$X5nj%AVNeXNATyDgF9S**cy}6}E1?#1J)RXLxVMM6t9o*97f3@T$G-??ph{$)fL0{+ z(8ejD^0Bg1az|~aRj=V9PE%!wE=l1!E_HIp8@XC<7)f|lmkC8r_PD!%8`^>XO>X1T zs_GMHu@wu5(fAZ!T=~Rpa%$CG0WGPf$`Dj+f|KpAg@t@tm|hkqc(RqA1`m+hJWax!HZ+{t7Y1D>f&xRr`T!GDy@M5|4;&h&_8)1|Z zZ^~;QnUi+VqZJR)Wel{(kI-A=T6IQjGI+;HEFl9QN)P_q z{t13z>N4}hk?HXFpo@DlB3`3{41dBh*y<0FWzso%bXpENs&`Db;JY03Ko4AU`!YR2 zKPTV?GMzOvK;JC~c3(vosvlj(8Oxuc zE>qVY`VO~B)2o9%ZD)_Wcshrc9Qp(q2iIO$4f=lYY1D?o%w@Fjt{Y{ucRm<*ZoMhd zy&CFOYG(fGuGxfme_i?hF@HFIe4GqfWEtA}N0<@?>31XsvqbbEY*C%QUV%P*MiP7) zwWY%MxwF@}BHc4kuXtHftN!Ct9%abaa1<^-mu7rV4=C#}(*v}mnks|1Bs6MEg?rJ_ z7eu-QCljkFTk&p0^PGv4~?m`@K>r| zNtI73y&g&;>55{y#cIloS~ztI3v|yyeS>Nf!<>xL!ZJdCPvYpFg?d({G%l*mj>LGB z7M2m}lB0VT>RFZ2xPPcN{Xa>@qO^`FqBl?XEYyGI)Ko&_oO(ARn~T!IGD3r+dlu^Z z%Z!R?oKx>c43?v`!KqRyNB1n$b1J5BS#5Hxi`3RlwKPZfEYx#v2bjiXwaGCWsf|vt zGDr6u)Uzs~aY=1)bVg`l8EWCd)4d4wH5JpiUGH6@ zDe+{g9HE6}M09VC?nS5%R7&G^y?2SG#JdZkw6Kh*cYc=cMW_!{O5=9DcZqvF1iBZX rzN%6hx9h!2{9nQfg?g(IYHzyr7uOh)A|0<51Mk!9KUTLYE+IKTm9?_Z*%{{Tp{eP>{7fXdH> zP%Ix97W2oYKTL4ql2NgQ{ z8$%V-9|ou@?u8H$&H!gjRSRIMk_#BXuCE2U0$ml~2B<0?prHw%;8I}Vdw{NrKLDz# z{sIFhNL2{~EKJ;xRGk-qsA4o?V3-B;$c)bu80P&-0QYHzyp{NOh)A|0<7N@k!3mbjetsi9^eGB^;1@@p8%xUe=smkfXeTM zP%Ix97!cG7SHzZZ(1t6*ztr!@#0X?$w^9F`}-@w7jG;`*&nKMC=C6%uYQN{Ub nk>584{D6~z7(jnj+%N_JXrVx&FohfD00000NkvXXu0mjfOk9bd diff --git a/docs/_images/math/df0deb143e5ac127f00bd248ee8001ecae572adc.png b/docs/_images/math/df0deb143e5ac127f00bd248ee8001ecae572adc.png index 65c29ae2426c3b143c82d70125bf8804f7f38bdf..31d96d61b4ae97050b997ab7dd121c78bdea0ff9 100644 GIT binary patch delta 102 zcmV-s0Ga>D0mK22BnTu>OjJex|NoIO9~}Sy$0j~y0000BbW%=J0ATYnNOZp#`>%^4 zW&0D6Y$`IAFb0NF4hIGSYanMD5VtTeuqp^Ja86jjz%sP}07V!Z delta 104 zcmV-u0GI#70muQ6BnT%^OjJex|NoIO9Faf>93quYo&W#<40KXXQvhJ|GK;?$`$(^J zofVxgtC46bGDcShh7|S&1_2u&XDSd^Ffec_2rzI?*ucOsB>@0Sk_A3DzteaC0000< KMNUMnLSTY-DVH1Gxi`aDQOw-AJ_H1hxPG010qNS#tmY4#WTe4#WYKD-Ig~009(9L_t(2 z&tqUzb_sP!bzndOEb%FB_ZYAP<^qN=2F3;yvC9k$lLRnSa=2Y&V3@_g`H_Kr1B%2i z1_nk24CU?&^A|8kGBEyR;4VNG=ljmU!2SkI*`bRAOD>g)H&g%s010qNS#tmY4#WTe4#WYKD-Ig~009$8L_t(2 z&tqUzb_sPebzndOEb%FBuNkld<^qN=2F3;yvBwMylLRnSa}2r`szKoMsFJL59CopwO%%D}LIGX_L8yjKoRBf(H=OjZZ<);kV00000NkvXXu0mjf+@6PP diff --git a/docs/_images/math/e486de19dbb61c9a63584f6e4ddd6e92ef03d665.png b/docs/_images/math/e486de19dbb61c9a63584f6e4ddd6e92ef03d665.png index d3be284e9f26a051652c6e3cd6a4d09e06d58505..f6cece395f56bb714d6572aa4f551e0364aeb049 100644 GIT binary patch delta 163 zcmaFL_<(VO3WrvJPl)UP|Nkc%s@5|wJPY>hVqjq4EeY}qX7E`5@~%;dMECiG-=D9f zRD1wRa29w(7BevL9RXp+soH$fKtX3u7sn8e>&XcTA^R4cVqnP$cT{n{*vq7#IjL-` zaYN%rrR$~Z*7BTTi@7dYX7s_2A;`=?E~L*6-Ypa1{>4s=pZQvgW6^WCp9bQp`z zVEgHG)Kof=a2Q!cNkl&pfpDN_DyRssX#L+%&c#~64o zaKB(U#lW`!NL_~UpmGdSV3k09&l?b?fXxH|oSGUz=<$iD00000NkvXXu0mjf5kNbN diff --git a/docs/_images/math/eb1313ea5f5a28395ab1dcd07e1e29eeefa1bb75.png b/docs/_images/math/eb1313ea5f5a28395ab1dcd07e1e29eeefa1bb75.png index 7eca7948e6f26c37aafa63d9b78064b851730308..a82ddba3894a7633e017c166f41d0c1caf6eac2c 100644 GIT binary patch delta 872 zcmV-u1DE{G2f_!CXMeBz&vczK-AMDlV2kN!8d9eK000SaNLh0L01m_e01m_fl`9S# z0009WNklF|90&04tj_)$SBI#J{bAck@FoO#Nl~Qx6eM(zh9VCYwS<%) zl@ir@a_A&PZgq~#gHRpxV$pF6B0NZ29fZh2x7f@>y?O8L%zvAC<5ScJ>&`wu%(cKdtzDM*XHx#PYw~d&wDa4*{S@iAveQPD3A#;0TYwf+`{xuXr zv%SMiE9}iUztKMJ&@WT3bZ5F$L277ERva6bE8X;4|E!O7@V>PoXzE0fU=LV$bN?OlLy=c76hMd%k7yxH0?E!r23@xwX+$L-p0QAkd#%({(g^wbIjqR3tp10}|uvV%|8 zdnfr+N&7hN4!9;sNKR~wzh2u!Ad2oiQyXoWPdH0gdu#jBf#?eP@ z^=up{fPZM9dNLNzu<`f=X!y;ytPoC{VG->_egj2vUQHb+fM}pA}^gtD^RJwR5JRc|`1 z6DWvipkjx4HH!q5-oJqAuFdD#DkJxEr0BI8zd`B#lgg1J2k_GgYoqTKPVKw z6$McGY{+?)g_z#+ zJz=E^)FubAf{QGM0@yHR0o{@uuWb19P%HB-?xpIjs9ah<(18s7YOq8+A0c2klH--< yg%eLZ%D4CcN=5z8IL*HEBF;yNr0jTQ()S-f(+3%h|F}B<0000FfhIs3)iPZk*ZJJ#7oV^jd8=s2a zr>P-~a@|Z$qr4N&AJk`@`0ZHhZOML}5Dr;~^WL`@_ug4MNAK2GYqFR7%sPyVS8$Mf zMIy)vkr^JX!hgu1d_z(3Mz9%Vn64mbrucf58Xs29A06c2=Q<3h9Tv}nNCY_{GQ*7x z!~t8~Kkvm9Q3y?=z96FqI8CkCts;gnQYcKZZOCCP)pw;x_}f*D%*+9hg@~86f*6Db z_=1vEfa*hJ19&R_M+T!Cc!?mPaAIVJmr|tsBggcDD1U?*L6G&gwriSKFjwTM81>-< zS++TaP&hF%!y`Rjk$uz)q7a4!LFlTLX(>H)mYWR%98D8TArwxG%y6?BzO^Jl6iP3N zg6zqg%=q#kHQ2Ar81)LSSs}EM8UAi{2ZAW{+!X{lX#B@&=mK$VGVGBL$L76EI6*?; zq{Pvsc7F%97bHPc&;e45*H}HC0UH1CBP#$$oFEa-iTFd5$h?MLkOWadcggjb7OG7W zpQwU{ZVAAwRz!l07KBFja3hbVO0Dd-5+p-ZP!|z%n>Eerq#E*g7B*vpLTDp%Tp8>W70@%QP3R3@<7K(Al@Fa6Iy&uZ?vWT#o9_ z*kOprXs^XrWL@gf9+pRJ|PctJQ z;B4xVZQ&iRw!W8ig1S{fWx4laT!PII0+y?AJhJejWmNcc9lE!oN?8%)){NsxdTVq= zcp-#Z>v3f{m4*a;(ID#s%e-+zxV7Z7@>My3D&002ov JPDHLkV1hOhoO=KO diff --git a/docs/_images/math/f75f28dbcbec776fbe031f8ac923f83f9260bd8a.png b/docs/_images/math/f75f28dbcbec776fbe031f8ac923f83f9260bd8a.png index f3e04a412c749730a91ede6ba627cc7a0664b192..7af2da278e81fb2c9ddd6a87e624b2f9188df829 100644 GIT binary patch delta 400 zcmV;B0dM~B1MUNmXn!(`NEl$h-TR%-bn^`(uDbvL010qNS#tmY4#WTe4#WYKD-Ig~ z00Bu!L_t(2&wWunO9Md=eaYTOE*J68KM;)ADCBBUJn;wQN-czqorol*jU-qIRto8~ z@EWZGrm_*yO4}<8*hL!)ZBi%!A?VEBT@oRG=!;U0 zS!oq0bZL^F@qZjbE+X~~a@1a&%!`rbl6zkpu+!1^q2le`O&vNYJV&Fw8Y$cNPO{}e*h u6PD$0IP!A)3E9%fX7rUsLNaZY`~px;M63mIb$I{)00{s|MNUMnLSTYl6|~p@ delta 402 zcmV;D0d4;71MmZoXn#nHG8kaL-TR%-bn_xL(tiK|010qNS#tmY4#WTe4#WYKD-Ig~ z00B!$L_t(2&wWw9OT$nQ{@VO#Q(FxB4~SS7hj!FKh=a2oMHHhr=pgOrA{93$JJ>}8 z3qn`p;v%Axu7NtZ)h;4V1%-eh`rS*?I1~@wec!#icX#g&7=I=c5sr;74)sHG_saNB zwi3Ef)&=y_pRXC>0kD1ffbN@H*tJbsna8TLR%$@6(R)|H4q#?|JOL|Mufh62THaS} z*o1UN%ns@V))soMO$7MNoZ7Q!K5d1$Gw-YcSScwTsVz*WeA zWAS)Jsy;gj%ztMeh8jqwD@jg#sy1r`XAUbU&wRcW|Mr#rG=!Lv&2Y3&f-4sc8TAn{ zYygvlW@Oh@2&Vbuo?j@3K?XA&g?VqGonJsJ^)Ndn(`4;p=vf_1dc>o$+<~1?OSZL~ w?8LBRmP5Fcf$Ya*lWWueZrO}Uwxf(+$go7toYOk|01E&B07*qoM6N<$f>@Qd9{>OV diff --git a/docs/_modules/abc.html b/docs/_modules/abc.html index 3822e8cf..17dfcda0 100644 --- a/docs/_modules/abc.html +++ b/docs/_modules/abc.html @@ -5,13 +5,13 @@ - abc — Tigramite 4.2 documentation + abc — Tigramite 4.3 documentation + + + + + + + + + + +
+
+
+
+ +

Source code for abc

+# Copyright 2007 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Abstract Base Classes (ABCs) according to PEP 3119."""
+
+from _weakrefset import WeakSet
+
+
+def abstractmethod(funcobj):
+    """A decorator indicating abstract methods.
+
+    Requires that the metaclass is ABCMeta or derived from it.  A
+    class that has a metaclass derived from ABCMeta cannot be
+    instantiated unless all of its abstract methods are overridden.
+    The abstract methods can be called using any of the normal
+    'super' call mechanisms.
+
+    Usage:
+
+        class C(metaclass=ABCMeta):
+            @abstractmethod
+            def my_abstract_method(self, ...):
+                ...
+    """
+    funcobj.__isabstractmethod__ = True
+    return funcobj
+
+
+class abstractclassmethod(classmethod):
+    """
+    A decorator indicating abstract classmethods.
+
+    Similar to abstractmethod.
+
+    Usage:
+
+        class C(metaclass=ABCMeta):
+            @abstractclassmethod
+            def my_abstract_classmethod(cls, ...):
+                ...
+
+    'abstractclassmethod' is deprecated. Use 'classmethod' with
+    'abstractmethod' instead.
+    """
+
+    __isabstractmethod__ = True
+
+    def __init__(self, callable):
+        callable.__isabstractmethod__ = True
+        super().__init__(callable)
+
+
+class abstractstaticmethod(staticmethod):
+    """
+    A decorator indicating abstract staticmethods.
+
+    Similar to abstractmethod.
+
+    Usage:
+
+        class C(metaclass=ABCMeta):
+            @abstractstaticmethod
+            def my_abstract_staticmethod(...):
+                ...
+
+    'abstractstaticmethod' is deprecated. Use 'staticmethod' with
+    'abstractmethod' instead.
+    """
+
+    __isabstractmethod__ = True
+
+    def __init__(self, callable):
+        callable.__isabstractmethod__ = True
+        super().__init__(callable)
+
+
+class abstractproperty(property):
+    """
+    A decorator indicating abstract properties.
+
+    Requires that the metaclass is ABCMeta or derived from it.  A
+    class that has a metaclass derived from ABCMeta cannot be
+    instantiated unless all of its abstract properties are overridden.
+    The abstract properties can be called using any of the normal
+    'super' call mechanisms.
+
+    Usage:
+
+        class C(metaclass=ABCMeta):
+            @abstractproperty
+            def my_abstract_property(self):
+                ...
+
+    This defines a read-only property; you can also define a read-write
+    abstract property using the 'long' form of property declaration:
+
+        class C(metaclass=ABCMeta):
+            def getx(self): ...
+            def setx(self, value): ...
+            x = abstractproperty(getx, setx)
+
+    'abstractproperty' is deprecated. Use 'property' with 'abstractmethod'
+    instead.
+    """
+
+    __isabstractmethod__ = True
+
+
+class ABCMeta(type):
+
+    """Metaclass for defining Abstract Base Classes (ABCs).
+
+    Use this metaclass to create an ABC.  An ABC can be subclassed
+    directly, and then acts as a mix-in class.  You can also register
+    unrelated concrete classes (even built-in classes) and unrelated
+    ABCs as 'virtual subclasses' -- these and their descendants will
+    be considered subclasses of the registering ABC by the built-in
+    issubclass() function, but the registering ABC won't show up in
+    their MRO (Method Resolution Order) nor will method
+    implementations defined by the registering ABC be callable (not
+    even via super()).
+
+    """
+
+    # A global counter that is incremented each time a class is
+    # registered as a virtual subclass of anything.  It forces the
+    # negative cache to be cleared before its next use.
+    # Note: this counter is private. Use `abc.get_cache_token()` for
+    #       external code.
+    _abc_invalidation_counter = 0
+
+    def __new__(mcls, name, bases, namespace, **kwargs):
+        cls = super().__new__(mcls, name, bases, namespace, **kwargs)
+        # Compute set of abstract method names
+        abstracts = {name
+                     for name, value in namespace.items()
+                     if getattr(value, "__isabstractmethod__", False)}
+        for base in bases:
+            for name in getattr(base, "__abstractmethods__", set()):
+                value = getattr(cls, name, None)
+                if getattr(value, "__isabstractmethod__", False):
+                    abstracts.add(name)
+        cls.__abstractmethods__ = frozenset(abstracts)
+        # Set up inheritance registry
+        cls._abc_registry = WeakSet()
+        cls._abc_cache = WeakSet()
+        cls._abc_negative_cache = WeakSet()
+        cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter
+        return cls
+
+    def register(cls, subclass):
+        """Register a virtual subclass of an ABC.
+
+        Returns the subclass, to allow usage as a class decorator.
+        """
+        if not isinstance(subclass, type):
+            raise TypeError("Can only register classes")
+        if issubclass(subclass, cls):
+            return subclass  # Already a subclass
+        # Subtle: test for cycles *after* testing for "already a subclass";
+        # this means we allow X.register(X) and interpret it as a no-op.
+        if issubclass(cls, subclass):
+            # This would create a cycle, which is bad for the algorithm below
+            raise RuntimeError("Refusing to create an inheritance cycle")
+        cls._abc_registry.add(subclass)
+        ABCMeta._abc_invalidation_counter += 1  # Invalidate negative cache
+        return subclass
+
+    def _dump_registry(cls, file=None):
+        """Debug helper to print the ABC registry."""
+        print("Class: %s.%s" % (cls.__module__, cls.__qualname__), file=file)
+        print("Inv.counter: %s" % ABCMeta._abc_invalidation_counter, file=file)
+        for name in sorted(cls.__dict__):
+            if name.startswith("_abc_"):
+                value = getattr(cls, name)
+                if isinstance(value, WeakSet):
+                    value = set(value)
+                print("%s: %r" % (name, value), file=file)
+
+    def __instancecheck__(cls, instance):
+        """Override for isinstance(instance, cls)."""
+        # Inline the cache checking
+        subclass = instance.__class__
+        if subclass in cls._abc_cache:
+            return True
+        subtype = type(instance)
+        if subtype is subclass:
+            if (cls._abc_negative_cache_version ==
+                ABCMeta._abc_invalidation_counter and
+                subclass in cls._abc_negative_cache):
+                return False
+            # Fall back to the subclass check.
+            return cls.__subclasscheck__(subclass)
+        return any(cls.__subclasscheck__(c) for c in {subclass, subtype})
+
+    def __subclasscheck__(cls, subclass):
+        """Override for issubclass(subclass, cls)."""
+        # Check cache
+        if subclass in cls._abc_cache:
+            return True
+        # Check negative cache; may have to invalidate
+        if cls._abc_negative_cache_version < ABCMeta._abc_invalidation_counter:
+            # Invalidate the negative cache
+            cls._abc_negative_cache = WeakSet()
+            cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter
+        elif subclass in cls._abc_negative_cache:
+            return False
+        # Check the subclass hook
+        ok = cls.__subclasshook__(subclass)
+        if ok is not NotImplemented:
+            assert isinstance(ok, bool)
+            if ok:
+                cls._abc_cache.add(subclass)
+            else:
+                cls._abc_negative_cache.add(subclass)
+            return ok
+        # Check if it's a direct subclass
+        if cls in getattr(subclass, '__mro__', ()):
+            cls._abc_cache.add(subclass)
+            return True
+        # Check if it's a subclass of a registered class (recursive)
+        for rcls in cls._abc_registry:
+            if issubclass(subclass, rcls):
+                cls._abc_cache.add(subclass)
+                return True
+        # Check if it's a subclass of a subclass (recursive)
+        for scls in cls.__subclasses__():
+            if issubclass(subclass, scls):
+                cls._abc_cache.add(subclass)
+                return True
+        # No dice; update negative cache
+        cls._abc_negative_cache.add(subclass)
+        return False
+
+
+class ABC(metaclass=ABCMeta):
+    """Helper class that provides a standard way to create an ABC using
+    inheritance.
+    """
+    pass
+
+
+def get_cache_token():
+    """Returns the current ABC cache token.
+
+    The token is an opaque object (supporting equality testing) identifying the
+    current version of the ABC cache for virtual subclasses. The token changes
+    with every call to ``register()`` on any ABC.
+    """
+    return ABCMeta._abc_invalidation_counter
+
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/index.html b/docs/_build/html/_modules/index.html new file mode 100644 index 00000000..9018e034 --- /dev/null +++ b/docs/_build/html/_modules/index.html @@ -0,0 +1,99 @@ + + + + + + + Overview: module code — Tigramite 5.0 documentation + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/causal_effects.html b/docs/_build/html/_modules/tigramite/causal_effects.html new file mode 100644 index 00000000..ef59180d --- /dev/null +++ b/docs/_build/html/_modules/tigramite/causal_effects.html @@ -0,0 +1,2777 @@ + + + + + + + tigramite.causal_effects — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.causal_effects

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+import numpy as np
+import itertools
+from copy import deepcopy
+from collections import defaultdict
+from tigramite.models import Models
+
+
[docs]class CausalEffects(): + r"""General linear/nonparametric (conditional) causal effect analysis. + + Handles the estimation of causal effects given a causal graph. Various + graph types are supported. + + STILL IN DEVELOPMENT! + + See the corresponding tigramite tutorial for an in-depth introduction. + + Parameters + ---------- + graph : array of either shape [N, N], [N, N, tau_max+1], + or [N, N, tau_max+1, tau_max+1] + Different graph types are supported, see tutorial. + X : list of tuples + List of tuples [(i, -tau), ...] containing cause variables. + Y : list of tuples + List of tuples [(j, 0), ...] containing effect variables. + S : list of tuples + List of tuples [(i, -tau), ...] containing conditioned variables. + graph_type : str + Type of graph. + hidden_variables : list + Hidden variables. The internal graph is constructed by marginalization. + check_SM_overlap : bool + Whether to check whether S overlaps with M. + verbosity : int, optional (default: 0) + Level of verbosity. + """ + + def __init__(self, + graph, + graph_type, + X, + Y, + S=None, + hidden_variables=None, + check_SM_overlap=True, + verbosity=0): + + self.verbosity = verbosity + + supported_graphs = ['dag', + 'admg', + 'tsg_dag', + 'tsg_admg', + 'stationary_dag', + # 'stationary_admg', + + # 'mag', + # 'tsg_mag', + # 'stationary_mag', + # 'pag', + # 'tsg_pag', + # 'stationary_pag', + ] + + # Maybe not needed... + # self.ignore_time_bounds = False + + self.N = graph.shape[0] + + if S is None: + S = [] + + X = set(X) + Y = set(Y) + S = set(S) + + # + # Checks regarding graph type + # + if graph_type not in supported_graphs: + raise ValueError("Only graph types %s supported!" %supported_graphs) + + # TODO: check that masking aligns with hidden samples in variables + if hidden_variables is None: + hidden_variables = [] + + + self.hidden_variables = set(hidden_variables) + if len(self.hidden_variables.intersection(X.union(Y).union(S))) > 0: + raise ValueError("XYS overlaps with hidden_variables!") + + + self.X = X + self.Y = Y + self.S = S + + if 'pag' in graph_type: + self.possible = True + self.definite_status = True + else: + self.possible = False + self.definite_status = False + + (self.graph, self.graph_type, + self.tau_max, self.hidden_variables) = self._construct_graph( + graph=graph, graph_type=graph_type, + hidden_variables=hidden_variables) + + # print(self.graph.shape) + self._check_graph(self.graph) + + anc_Y = self._get_ancestors(Y) + + # If X is not in anc(Y), then no causal link exists + if anc_Y.intersection(set(X)) == set(): + raise ValueError("No causal path from X to Y exists.") + + + + # Get mediators + mediators = self.get_mediators(start=self.X, end=self.Y) + + M = set(mediators) + self.M = M + + for varlag in X.union(Y).union(S): + if abs(varlag[1]) > self.tau_max: + raise ValueError("X, Y, S must have time lags inside graph.") + + if len(self.X.intersection(self.Y)) > 0: + raise ValueError("Overlap between X and Y") + + if len(S.intersection(self.Y.union(self.X))) > 0: + raise ValueError("Conditions S overlap with X or Y") + + # # TODO: need to prove that this is sufficient for non-identifiability! + # if len(self.X.intersection(self._get_descendants(self.M))) > 0: + # raise ValueError("Not identifiable: Overlap between X and des(M)") + + if check_SM_overlap and len(self.S.intersection(self.M)) > 0: + raise ValueError("Conditions S overlap with mediators M!") + + descendants = self._get_descendants(self.Y.union(self.M)) + + # Remove X and descendants of YM + self.forbidden_nodes = descendants.union(self.X) #.union(S) + + self.vancs = self._get_ancestors(list(self.X.union(self.Y).union(self.S))) - self.forbidden_nodes + + if len(self.S.intersection(self._get_descendants(self.X))) > 0: + if self.verbosity > 0: + print("Potentially outside assumptions: Conditions S overlap with des(X)") + + if len(self.S.intersection(self._get_descendants(self.Y))) > 0: + raise ValueError("Not identifiable: Conditions S overlap with des(Y)") + + self.listX = list(self.X) + self.listY = list(self.Y) + self.listS = list(self.S) + + if self.verbosity > 0: + print("\n##\n## Initializing CausalEffects class\n##" + "\n\nInput:") + print("\ngraph_type = %s" % graph_type + + "\nX = %s" % self.listX + + "\nY = %s" % self.listY + + "\nS = %s" % self.listS + + "\nM = %s" % list(self.M) + ) + if len(self.hidden_variables) > 0: + print("\nhidden_variables = %s" % self.hidden_variables + ) + print("\n\n") + + + def _construct_graph(self, graph, graph_type, hidden_variables): + """Construct internal graph object based on input graph and hidden variables.""" + + + if graph_type in ['dag', 'admg']: + tau_max = 0 + if graph.ndim != 2: + raise ValueError("graph_type in ['dag', 'admg'] assumes graph.shape=(N, N).") + # Convert to shape [N, N, 1, 1] with dummy dimension + # to process as tsg_dag or tsg_admg with potential hidden variables + self.graph = np.expand_dims(graph, axis=(2, 3)) + self.tau_max = 0 + + if len(hidden_variables) > 0: + graph = self._get_latent_projection_graph() # stationary=False) + graph_type = "tsg_admg" + else: + graph = self.graph + graph_type = 'tsg_' + graph_type + + elif graph_type in ['tsg_dag', 'tsg_admg']: + if graph.ndim != 4: + raise ValueError("tsg-graph_type assumes graph.shape=(N, N, tau_max+1, tau_max+1).") + + # Then tau_max is ignored and implicitely derived from + # the dimensions + self.graph = graph + self.tau_max = graph.shape[2] - 1 + + if len(hidden_variables) > 0: + graph = self._get_latent_projection_graph() #, stationary=False) + graph_type = "tsg_admg" + else: + graph_type = graph_type + + elif graph_type in ['stationary_dag']: + # Currently on stationary_dag without hidden variables is supported + if graph.ndim != 3: + raise ValueError("stationary graph_type assumes graph.shape=(N, N, tau_max+1).") + # TODO: remove if theory for stationary ADMGs is clear + if graph_type == 'stationary_dag' and len(hidden_variables) > 0: + raise ValueError("Hidden variables currently not supported for " + "stationary_dag.") + + # For a stationary DAG without hidden variables it's sufficient to consider + # a tau_max that includes the parents of X, Y, M, and S. A conservative + # estimate thereof is simply the lag-dimension of the stationary DAG plus + # the maximum lag of X,S. + statgraph_tau_max = graph.shape[2] - 1 + maxlag_XS = 0 + for varlag in self.X.union(self.S): + maxlag_XS = max(maxlag_XS, abs(varlag[1])) + + tau_max = maxlag_XS + statgraph_tau_max + + stat_graph = deepcopy(graph) + + # Construct tsg_graph + graph = np.zeros((self.N, self.N, tau_max + 1, tau_max + 1), dtype='<U3') + graph[:] = "" + for (i, j) in itertools.product(range(self.N), range(self.N)): + for jt, tauj in enumerate(range(0, tau_max + 1)): + for it, taui in enumerate(range(tauj, tau_max + 1)): + tau = abs(taui - tauj) + if tau == 0 and j == i: + continue + if tau > statgraph_tau_max: + continue + + # if tau == 0: + # if stat_graph[i, j, tau] == '-->': + # graph[i, j, taui, tauj] = "-->" + # graph[j, i, tauj, taui] = "<--" + + # # elif stat_graph[i, j, tau] == '<--': + # # graph[i, j, taui, tauj] = "<--" + # # graph[j, i, tauj, taui] = "-->" + # else: + if stat_graph[i, j, tau] == '-->': + graph[i, j, taui, tauj] = "-->" + graph[j, i, tauj, taui] = "<--" + + # elif stat_graph[i, j, tau] == '<--': + # graph[i, j, taui, tauj] = "<--" + # graph[j, i, tauj, taui] = "-->" + + graph_type = 'tsg_dag' + + return (graph, graph_type, tau_max, hidden_variables) + + # max_lag = self._get_maximum_possible_lag(XYZ=list(X.union(Y).union(S)), graph=graph) + + # stat_mediators = self._get_mediators_stationary_graph(start=X, end=Y, max_lag=max_lag) + # self.tau_max = self._get_maximum_possible_lag(XYZ=list(X.union(Y).union(S).union(stat_mediators)), graph=graph) + # self.tau_max = graph_taumax + # for varlag in X.union(Y).union(S): + # self.tau_max = max(self.tau_max, abs(varlag[1])) + + # if verbosity > 0: + # print("Setting tau_max = ", self.tau_max) + + # if tau_max is None: + # self.tau_max = graph_taumax + # for varlag in X.union(Y).union(S): + # self.tau_max = max(self.tau_max, abs(varlag[1])) + + # if verbosity > 0: + # print("Setting tau_max = ", self.tau_max) + # else: + # self.tau_max = graph_taumax + # # Repeat hidden variable pattern + # # if larger tau_max is given + # if self.tau_max > graph_taumax: + # for lag in range(graph_taumax + 1, self.tau_max + 1): + # for j in range(self.N): + # if (j, -(lag % (graph_taumax+1))) in self.hidden_variables: + # self.hidden_variables.add((j, -lag)) + # print(self.hidden_variables) + + # self.graph = self._get_latent_projection_graph(self.graph, stationary=True) + # self.graph_type = "tsg_admg" + # else: + + +
[docs] def check_XYS_paths(self): + """Check whether one can remove nodes from X and Y with no proper causal paths. + + Returns + ------- + X, Y : cleaned lists of X and Y with irrelevant nodes removed. + """ + + # TODO: Also check S... + oldX = self.X.copy() + oldY = self.Y.copy() + + anc_Y = self._get_ancestors(self.Y) + anc_S = self._get_ancestors(self.S) + + # Remove first from X those nodes with no causal path to Y or S + X = set([x for x in self.X if x in anc_Y.union(anc_S)]) + + # Remove from Y those nodes with no causal path from X + des_X = self._get_descendants(X) + + Y = set([y for y in self.Y if y in des_X]) + + # Also require that all x in X have proper path to Y or S, + # that is, the first link goes out of x + # and into path nodes + mediators_S = self.get_mediators(start=self.X, end=self.S) + path_nodes = list(self.M.union(Y).union(mediators_S)) + X = X.intersection(self._get_all_parents(path_nodes)) + + if set(oldX) != set(X) and self.verbosity > 0: + print("Consider pruning X = %s to X = %s " %(oldX, X) + + "since only these have causal path to Y") + + if set(oldY) != set(Y) and self.verbosity > 0: + print("Consider pruning Y = %s to Y = %s " %(oldY, Y) + + "since only these have causal path from X") + + return (list(X), list(Y))
+ + + def _check_graph(self, graph): + """Checks that graph contains no invalid entries/structure. + + Assumes graph.shape = (N, N, tau_max+1, tau_max+1) + """ + + allowed_edges = ["-->", "<--"] + if 'admg' in self.graph_type: + allowed_edges += ["<->", "<-+", "+->"] + elif 'mag' in self.graph_type: + allowed_edges += ["<->"] + elif 'pag' in self.graph_type: + allowed_edges += ["<->", "o-o", "o->", "<-o"] # "o--", + # "--o", + # "x-o", + # "o-x", + # "x--", + # "--x", + # "x->", + # "<-x", + # "x-x", + # ] + + graph_dict = defaultdict(list) + for i, j, taui, tauj in zip(*np.where(graph)): + edge = graph[i, j, taui, tauj] + # print((i, -taui), edge, (j, -tauj), graph[j, i, tauj, taui]) + if edge != self._reverse_link(graph[j, i, tauj, taui]): + raise ValueError( + "graph needs to have consistent edges (eg" + " graph[i,j,taui,tauj]='-->' requires graph[j,i,tauj,taui]='<--')" + ) + + if edge not in allowed_edges: + raise ValueError("Invalid graph edge %s." %(edge)) + + if edge == "-->" or edge == "+->": + # Map to (i,-taui, j, tauj) graph + indexi = i * (self.tau_max + 1) + taui + indexj = j * (self.tau_max + 1) + tauj + + graph_dict[indexj].append(indexi) + + # Check for cycles + if self._check_cyclic(graph_dict): + raise ValueError("graph is cyclic.") + + # if MAG: check for almost cycles + # if PAG??? + + def _check_cyclic(self, graph_dict): + """Return True if the graph_dict has a cycle. + graph_dict must be represented as a dictionary mapping vertices to + iterables of neighbouring vertices. For example: + + >>> cyclic({1: (2,), 2: (3,), 3: (1,)}) + True + >>> cyclic({1: (2,), 2: (3,), 3: (4,)}) + False + + """ + path = set() + visited = set() + + def visit(vertex): + if vertex in visited: + return False + visited.add(vertex) + path.add(vertex) + for neighbour in graph_dict.get(vertex, ()): + if neighbour in path or visit(neighbour): + return True + path.remove(vertex) + return False + + return any(visit(v) for v in graph_dict) + +
[docs] def get_mediators(self, start, end): + """Returns mediator variables on proper causal paths from X to Y""" + + des_X = self._get_descendants(start) + + mediators = set() + + # Walk along proper causal paths backwards from Y to X + potential_mediators = set() + for y in end: + j, tau = y + this_level = [y] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + for parent in self._get_parents(varlag): + i, tau = parent + if (parent in des_X + and parent not in mediators + # and parent not in potential_mediators + and parent not in start + and parent not in end + and (-self.tau_max <= tau <= 0)): # or self.ignore_time_bounds)): + mediators = mediators.union(set([parent])) + next_level.append(parent) + + this_level = next_level + + return mediators
+ + def _get_mediators_stationary_graph(self, start, end, max_lag): + """Returns mediator variables on proper causal paths + from X to Y in a stationary graph.""" + + des_X = self._get_descendants_stationary_graph(start, max_lag) + + mediators = set() + + # Walk along proper causal paths backwards from Y to X + potential_mediators = set() + for y in end: + j, tau = y + this_level = [y] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + for _, parent in self._get_adjacents_stationary_graph(graph=self.graph, + node=varlag, patterns="<*-", max_lag=max_lag, exclude=None): + i, tau = parent + if (parent in des_X + and parent not in mediators + # and parent not in potential_mediators + and parent not in start + and parent not in end + # and (-self.tau_max <= tau <= 0 or self.ignore_time_bounds) + ): + mediators = mediators.union(set([parent])) + next_level.append(parent) + + this_level = next_level + + return mediators + + def _reverse_link(self, link): + """Reverse a given link, taking care to replace > with < and vice versa""" + + if link == "": + return "" + + if link[2] == ">": + left_mark = "<" + else: + left_mark = link[2] + + if link[0] == "<": + right_mark = ">" + else: + right_mark = link[0] + + return left_mark + link[1] + right_mark + + def _match_link(self, pattern, link): + """Matches pattern including wildcards with link. + + In an ADMG we have edge types ["-->", "<--", "<->", "+->", "<-+"]. + Here +-> corresponds to having both "-->" and "<->". + + In a MAG we have edge types ["-->", "<--", "<->", "---"]. + """ + + if pattern == '' or link == '': + return True if pattern == link else False + else: + left_mark, middle_mark, right_mark = pattern + if left_mark != '*': + if link[0] != '+': + if link[0] != left_mark: return False + + if right_mark != '*': + if link[2] != '+': + if link[2] != right_mark: return False + + if middle_mark != '*' and link[1] != middle_mark: return False + + return True + + def _find_adj(self, node, patterns, exclude=None, return_link=False): + """Find adjacencies of node matching patterns.""" + + graph = self.graph + + if exclude is None: + exclude = [] + # exclude = self.hidden_variables + # else: + # exclude = set(exclude).union(self.hidden_variables) + + # Setup + i, lag_i = node + lag_i = abs(lag_i) + + if exclude is None: exclude = [] + if type(patterns) == str: + patterns = [patterns] + + # Init + adj = [] + # Find adjacencies going forward/contemp + for k, lag_ik in zip(*np.where(graph[i,:,lag_i,:])): + # print((k, lag_ik), graph[i,k,lag_i,lag_ik]) + matches = [self._match_link(patt, graph[i,k,lag_i,lag_ik]) for patt in patterns] + if np.any(matches): + match = (k, -lag_ik) + if match not in exclude: + if return_link: + adj.append((graph[i,k,lag_i,lag_ik], match)) + else: + adj.append(match) + + + # Find adjacencies going backward/contemp + for k, lag_ki in zip(*np.where(graph[:,i,:,lag_i])): + # print((k, lag_ki), graph[k,i,lag_ki,lag_i]) + matches = [self._match_link(self._reverse_link(patt), graph[k,i,lag_ki,lag_i]) for patt in patterns] + if np.any(matches): + match = (k, -lag_ki) + if match not in exclude: + if return_link: + adj.append((self._reverse_link(graph[k,i,lag_ki,lag_i]), match)) + else: + adj.append(match) + + adj = list(set(adj)) + return adj + + def _is_match(self, nodei, nodej, pattern_ij): + """Check whether the link between X and Y agrees with pattern_ij""" + + graph = self.graph + + (i, lag_i) = nodei + (j, lag_j) = nodej + tauij = lag_j - lag_i + if abs(tauij) >= graph.shape[2]: + return False + return ((tauij >= 0 and self._match_link(pattern_ij, graph[i, j, tauij])) or + (tauij < 0 and self._match_link(self._reverse_link(pattern_ij), graph[j, i, abs(tauij)]))) + + + def _get_children(self, varlag): + """Returns set of children (varlag --> ...) for (lagged) varlag.""" + if self.possible: + patterns=['-*>', 'o*o', 'o*>'] + else: + patterns=['-*>'] + return self._find_adj(node=varlag, patterns=patterns) + + def _get_parents(self, varlag): + """Returns set of parents (varlag <-- ...)) for (lagged) varlag.""" + if self.possible: + patterns=['<*-', 'o*o', '<*o'] + else: + patterns=['<*-'] + return self._find_adj(node=varlag, patterns=patterns) + + def _get_spouses(self, varlag): + """Returns set of spouses (varlag <-> ...)) for (lagged) varlag.""" + return self._find_adj(node=varlag, patterns=['<*>']) + + def _get_neighbors(self, varlag): + """Returns set of neighbors (varlag --- ...)) for (lagged) varlag.""" + return self._find_adj(node=varlag, patterns=['-*-']) + + def _get_ancestors(self, W): + """Get ancestors of nodes in W up to time tau_max. + + Includes the nodes themselves. + """ + + ancestors = set(W) + + for w in W: + j, tau = w + this_level = [w] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + + for par in self._get_parents(varlag): + i, tau = par + if par not in ancestors and -self.tau_max <= tau <= 0: + ancestors = ancestors.union(set([par])) + next_level.append(par) + + this_level = next_level + + return ancestors + + def _get_all_parents(self, W): + """Get parents of nodes in W up to time tau_max. + + Includes the nodes themselves. + """ + + parents = set(W) + + for w in W: + j, tau = w + for par in self._get_parents(w): + i, tau = par + if par not in parents and -self.tau_max <= tau <= 0: + parents = parents.union(set([par])) + + return parents + + def _get_all_spouses(self, W): + """Get spouses of nodes in W up to time tau_max. + + Includes the nodes themselves. + """ + + spouses = set(W) + + for w in W: + j, tau = w + for spouse in self._get_spouses(w): + i, tau = spouse + if spouse not in spouses and -self.tau_max <= tau <= 0: + spouses = spouses.union(set([spouse])) + + return spouses + + def _get_descendants_stationary_graph(self, W, max_lag): + """Get descendants of nodes in W up to time t. + + Includes the nodes themselves. + """ + + descendants = set(W) + + for w in W: + j, tau = w + this_level = [w] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + for _, child in self._get_adjacents_stationary_graph(graph=self.graph, + node=varlag, patterns="-*>", max_lag=max_lag, exclude=None): + i, tau = child + if (child not in descendants + # and (-self.tau_max <= tau <= 0 or self.ignore_time_bounds) + ): + descendants = descendants.union(set([child])) + next_level.append(child) + + this_level = next_level + + return descendants + + def _get_descendants(self, W): + """Get descendants of nodes in W up to time t. + + Includes the nodes themselves. + """ + + descendants = set(W) + + for w in W: + j, tau = w + this_level = [w] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + for child in self._get_children(varlag): + i, tau = child + if (child not in descendants + and (-self.tau_max <= tau <= 0)): # or self.ignore_time_bounds)): + descendants = descendants.union(set([child])) + next_level.append(child) + + this_level = next_level + + return descendants + + def _get_collider_path_nodes(self, W, descendants): + """Get non-descendant collider path nodes of nodes in W up to time t. + + """ + + collider_path_nodes = set([]) + # print("descendants ", descendants) + for w in W: + # print(w) + j, tau = w + this_level = [w] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + # print("\t", varlag, self._get_spouses(varlag)) + for spouse in self._get_spouses(varlag): + # print("\t\t", spouse) + i, tau = spouse + if (spouse not in collider_path_nodes + and spouse not in descendants + and (-self.tau_max <= tau <= 0)): # or self.ignore_time_bounds)): + collider_path_nodes = collider_path_nodes.union(set([spouse])) + next_level.append(spouse) + + this_level = next_level + + # Add parents + for w in collider_path_nodes: + for par in self._get_parents(w): + if (par not in collider_path_nodes + and par not in descendants + and (-self.tau_max <= tau <= 0)): # or self.ignore_time_bounds)): + collider_path_nodes = collider_path_nodes.union(set([par])) + + return collider_path_nodes + + def _get_adjacents_stationary_graph(self, graph, node, patterns, + max_lag=0, exclude=None): + """Find adjacencies of node matching patterns.""" + + # graph = self.graph + + # Setup + i, lag_i = node + if exclude is None: exclude = [] + if type(patterns) == str: + patterns = [patterns] + + # Init + adj = [] + + # Find adjacencies going forward/contemp + for k, lag_ik in zip(*np.where(graph[i,:,:])): + matches = [self._match_link(patt, graph[i, k, lag_ik]) for patt in patterns] + if np.any(matches): + match = (k, lag_i + lag_ik) + if (k, lag_i + lag_ik) not in exclude and (-max_lag <= lag_i + lag_ik <= 0): # or self.ignore_time_bounds): + adj.append((graph[i, k, lag_ik], match)) + + # Find adjacencies going backward/contemp + for k, lag_ki in zip(*np.where(graph[:,i,:])): + matches = [self._match_link(self._reverse_link(patt), graph[k, i, lag_ki]) for patt in patterns] + if np.any(matches): + match = (k, lag_i - lag_ki) + if (k, lag_i - lag_ki) not in exclude and (-max_lag <= lag_i - lag_ki <= 0): # or self.ignore_time_bounds): + adj.append((self._reverse_link(graph[k, i, lag_ki]), match)) + + adj = list(set(adj)) + return adj + + def _get_canonical_dag_from_graph(self, graph): + """ + Constructs links_coeffs dictionary, observed_vars, + and selection_vars from graph array (MAG or DAG). + + For every <-> link further latent variables are added. + This corresponds to a canonical DAG (Richardson Spirtes 2002). + + Can be used to evaluate d-separation. + + """ + + N, N, tau_maxplusone = graph.shape + tau_max = tau_maxplusone - 1 + + links = {j: [] for j in range(N)} + + # Add further latent variables to accommodate <-> links + latent_index = N + for i, j, tau in zip(*np.where(graph)): + + edge_type = graph[i, j, tau] + + # Consider contemporaneous links only once + if tau == 0 and j > i: + continue + + if edge_type == "-->": + links[j].append((i, -tau)) + elif edge_type == "<--": + links[i].append((j, -tau)) + elif edge_type == "<->": + links[latent_index] = [] + links[i].append((latent_index, 0)) + links[j].append((latent_index, -tau)) + latent_index += 1 + # elif edge_type == "---": + # links[latent_index] = [] + # selection_vars.append(latent_index) + # links[latent_index].append((i, -tau)) + # links[latent_index].append((j, 0)) + # latent_index += 1 + elif edge_type == "+->": + links[j].append((i, -tau)) + links[latent_index] = [] + links[i].append((latent_index, 0)) + links[j].append((latent_index, -tau)) + latent_index += 1 + elif edge_type == "<-+": + links[i].append((j, -tau)) + links[latent_index] = [] + links[i].append((latent_index, 0)) + links[j].append((latent_index, -tau)) + latent_index += 1 + + return links + + + def _get_maximum_possible_lag(self, XYZ, graph): + """Expects graph to be stationary type. See Thm. XXXX""" + + def _repeating(link, seen_path): + """Returns True if a link or its time-shifted version is already + included in seen_links.""" + i, taui = link[0] + j, tauj = link[1] + + for index, seen_link in enumerate(seen_path[:-1]): + seen_i, seen_taui = seen_link + seen_j, seen_tauj = seen_path[index + 1] + + if (i == seen_i and j == seen_j + and abs(tauj-taui) == abs(seen_tauj-seen_taui)): + return True + + return False + + # TODO: does this work with PAGs? + # if self.possible: + # patterns=['<*-', '<*o', 'o*o'] + # else: + # patterns=['<*-'] + + canonical_dag_links = self._get_canonical_dag_from_graph(graph) + + max_lag = 0 + for node in XYZ: + j, tau = node # tau <= 0 + max_lag = max(max_lag, abs(tau)) + + causal_path = [] + queue = [(node, causal_path)] + + while queue: + varlag, causal_path = queue.pop() + causal_path = [varlag] + causal_path + + var, lag = varlag + for partmp in canonical_dag_links[var]: + i, tautmp = partmp + # Get shifted lag since canonical_dag_links is at t=0 + tau = tautmp + lag + par = (i, tau) + + if (par not in causal_path): + + if len(causal_path) == 1: + queue.append((par, causal_path)) + continue + + if (len(causal_path) > 1) and not _repeating((par, varlag), causal_path): + + max_lag = max(max_lag, abs(tau)) + queue.append((par, causal_path)) + + return max_lag + + def _get_latent_projection_graph(self, stationary=False): + """For DAGs/ADMGs uses the Latent projection operation (Pearl 2009). + + Assumes a normal or stationary graph with potentially unobserved nodes. + Also allows particular time steps to be unobserved. By stationarity + that pattern ob unobserved nodes is repeated into -infinity. + + Latent projection operation for latents = nodes before t-tau_max or due to <->: + (i) auxADMG contains (i, -taui) --> (j, -tauj) iff there is a directed path + (i, -taui) --> ... --> (j, -tauj) on which + every non-endpoint vertex is in hidden variables (= not in observed_vars) + here iff (i, -|taui-tauj|) --> j in graph + (ii) auxADMG contains (i, -taui) <-> (j, -tauj) iff there exists a path of the + form (i, -taui) <-- ... --> (j, -tauj) on + which every non-endpoint vertex is non-collider AND in L (=not in observed_vars) + here iff (i, -|taui-tauj|) <-> j OR there is path + (i, -taui) <-- nodes before t-tau_max --> (j, -tauj) + """ + + # graph = self.graph + + # if self.hidden_variables is None: + # hidden_variables_here = [] + # else: + hidden_variables_here = self.hidden_variables + + aux_graph = np.zeros((self.N, self.N, self.tau_max + 1, self.tau_max + 1), dtype='<U3') + aux_graph[:] = "" + for (i, j) in itertools.product(range(self.N), range(self.N)): + for jt, tauj in enumerate(range(0, self.tau_max + 1)): + for it, taui in enumerate(range(0, self.tau_max + 1)): + tau = abs(taui - tauj) + if tau == 0 and j == i: + continue + if (i, -taui) in hidden_variables_here or (j, -tauj) in hidden_variables_here: + continue + # print("\n") + # print((i, -taui), (j, -tauj)) + + cond_i_xy = ( + # tau <= graph_taumax + # and (graph[i, j, tau] == '-->' or graph[i, j, tau] == '+->') + # ) + # and + self._check_path( #graph=graph, + start=[(i, -taui)], + end=[(j, -tauj)], + conditions=None, + starts_with='-*>', + ends_with='-*>', + path_type='causal', + hidden_by_taumax=False, + hidden_variables=hidden_variables_here, + stationary_graph=stationary, + )) + cond_i_yx = ( + # tau <= graph_taumax + # and (graph[i, j, tau] == '<--' or graph[i, j, tau] == '<-+') + # ) + # and + self._check_path( #graph=graph, + start=[(j, -tauj)], + end=[(i, -taui)], + conditions=None, + starts_with='-*>', + ends_with='-*>', + path_type='causal', + hidden_by_taumax=False, + hidden_variables=hidden_variables_here, + stationary_graph=stationary, + )) + if stationary: + hidden_by_taumax_here = True + else: + hidden_by_taumax_here = False + cond_ii = ( + # tau <= graph_taumax + # and + ( + # graph[i, j, tau] == '<->' + # or graph[i, j, tau] == '+->' or graph[i, j, tau] == '<-+')) + self._check_path( #graph=graph, + start=[(i, -taui)], + end=[(j, -tauj)], + conditions=None, + starts_with='<**', + ends_with='**>', + path_type='any', + hidden_by_taumax=hidden_by_taumax_here, + hidden_variables=hidden_variables_here, + stationary_graph=stationary, + ))) + + # print((i, -taui), (j, -tauj), cond_i_xy, cond_i_yx, cond_ii) + + if cond_i_xy and not cond_i_yx and not cond_ii: + aux_graph[i, j, taui, tauj] = "-->" #graph[i, j, tau] + # if tau == 0: + aux_graph[j, i, tauj, taui] = "<--" # graph[j, i, tau] + elif not cond_i_xy and cond_i_yx and not cond_ii: + aux_graph[i, j, taui, tauj] = "<--" #graph[i, j, tau] + # if tau == 0: + aux_graph[j, i, tauj, taui] = "-->" # graph[j, i, tau] + elif not cond_i_xy and not cond_i_yx and cond_ii: + aux_graph[i, j, taui, tauj] = '<->' + # if tau == 0: + aux_graph[j, i, tauj, taui] = '<->' + elif cond_i_xy and not cond_i_yx and cond_ii: + aux_graph[i, j, taui, tauj] = '+->' + # if tau == 0: + aux_graph[j, i, tauj, taui] = '<-+' + elif not cond_i_xy and cond_i_yx and cond_ii: + aux_graph[i, j, taui, tauj] = '<-+' + # if tau == 0: + aux_graph[j, i, tauj, taui] = '+->' + elif cond_i_xy and cond_i_yx: + raise ValueError("Cycle between %s and %s!" %(str(i, -taui), str(j, -tauj))) + # print(aux_graph[i, j, taui, tauj]) + + return aux_graph + + def _check_path(self, + # graph, + start, end, + conditions=None, + starts_with=None, + ends_with=None, + path_type='any', + # causal_children=None, + stationary_graph=False, + hidden_by_taumax=False, + hidden_variables=None, + ): + """ + + Includes checks of the optimality-theorem. Cond1-related checks test the existence of + a collider path, COnd2-related checks the negation of a certain path as stated + + """ + + # assert not (check_optimality_path == True and only_collider_paths == True) + + if conditions is None: + conditions = set([]) + # if conditioned_variables is None: + # S = [] + + start = set(start) + end = set(end) + conditions = set(conditions) + + # Get maximal possible time lag of a connecting path + # See Thm. XXXX + XYZ = start.union(end).union(conditions) + if stationary_graph: + max_lag = self._get_maximum_possible_lag(XYZ, self.graph) + causal_children = list(self._get_mediators_stationary_graph(start, end, max_lag).union(end)) + else: + max_lag = None + causal_children = list(self.get_mediators(start, end).union(end)) + + # if hidden_variables is None: + # hidden_variables = set([]) + + if hidden_by_taumax: + if hidden_variables is None: + hidden_variables = set([]) + hidden_variables = hidden_variables.union([(k, -tauk) for k in range(self.N) + for tauk in range(self.tau_max+1, max_lag + 1)]) + + # print("hidden_variables ", hidden_variables) + if starts_with is None: + starts_with = '***' + + if ends_with is None: + ends_with = '***' + + # + # Breadth-first search to find connection + # + # print("\nstart, starts_with, ends_with, end ", start, starts_with, ends_with, end) + # print("hidden_variables ", hidden_variables) + start_from = set() + for x in start: + if stationary_graph: + link_neighbors = self._get_adjacents_stationary_graph(graph=self.graph, node=x, patterns=starts_with, + max_lag=max_lag, exclude=list(start)) + else: + link_neighbors = self._find_adj(node=x, patterns=starts_with, exclude=list(start), return_link=True) + + # print("link_neighbors ", link_neighbors) + for link_neighbor in link_neighbors: + link, neighbor = link_neighbor + + # if before_taumax and neighbor[1] >= -self.tau_max: + # continue + + if (hidden_variables is not None and neighbor not in end + and neighbor not in hidden_variables): + continue + + if path_type == 'non_causal': + # By amenability every proper possibly directed causal path starts with -*> + if (neighbor in causal_children and self._match_link('-*>', link) + and not self._match_link('+*>', link)): + continue + elif path_type == 'causal': + if (neighbor not in causal_children or self._match_link('<**', link)): + continue + # start_from.add((link, neighbor)) + start_from.add((x, link, neighbor)) + + # print("start, end, start_from ", start, end, start_from) + + visited = set() + for (varlag_i, link_ik, varlag_k) in start_from: + visited.add((link_ik, varlag_k)) + + # Traversing through motifs i *-* k *-* j + while start_from: + + # print("Continue ", start_from) + # for (link_ik, varlag_k) in start_from: + removables = [] + for (varlag_i, link_ik, varlag_k) in start_from: + + # print("varlag_k in end ", varlag_k in end, link_ik) + if varlag_k in end: + if self._match_link(ends_with, link_ik): + # print("Connected ", varlag_i, link_ik, varlag_k) + return True + else: + removables.append((varlag_i, link_ik, varlag_k)) + + for removable in removables: + start_from.remove(removable) + if len(start_from)==0: + return False + + # Get any neighbor from starting nodes + # link_ik, varlag_k = start_from.pop() + varlag_i, link_ik, varlag_k = start_from.pop() + + # print("Get k = ", link_ik, varlag_k) + # print("start_from ", start_from) + # print("visited ", visited) + + if stationary_graph: + link_neighbors = self._get_adjacents_stationary_graph(graph=self.graph, node=varlag_k, patterns='***', + max_lag=max_lag, exclude=list(start)) + else: + link_neighbors = self._find_adj(node=varlag_k, patterns='***', exclude=list(start), return_link=True) + + # print("link_neighbors ", link_neighbors) + for link_neighbor in link_neighbors: + link_kj, varlag_j = link_neighbor + # print("Walk ", link_ik, varlag_k, link_kj, varlag_j) + + # print ("visited ", (link_kj, varlag_j), visited) + if (link_kj, varlag_j) in visited: + # if (varlag_i, link_kj, varlag_j) in visited: + # print("in visited") + continue + # print("Not in visited") + + if path_type == 'causal': + if not self._match_link('-*>', link_kj): + continue + + # If motif i *-* k *-* j is open, + # then add link_kj, varlag_j to visited and start_from + left_mark = link_ik[2] + right_mark = link_kj[0] + # print(left_mark, right_mark) + + if self.definite_status: + # Exclude paths that are not definite_status implying that any of the following + # motifs occurs: + # i *-> k o-* j + if (left_mark == '>' and right_mark == 'o'): + continue + # i *-o k <-* j + if (left_mark == 'o' and right_mark == '<'): + continue + # i *-o k o-* j and i and j are adjacent + if (left_mark == 'o' and right_mark == 'o' + and self._is_match(varlag_i, varlag_j, "***")): + continue + + # If k is in conditions and motif is *-o k o-*, then motif is blocked since + # i and j are non-adjacent due to the check above + if varlag_k in conditions and (left_mark == 'o' and right_mark == 'o'): + # print("Motif closed ", link_ik, varlag_k, link_kj, varlag_j ) + continue # [('>', '<'), ('>', '+'), ('+', '<'), ('+', '+')] + + # If k is in conditions and left or right mark is tail '-', then motif is blocked + if varlag_k in conditions and (left_mark == '-' or right_mark == '-'): + # print("Motif closed ", link_ik, varlag_k, link_kj, varlag_j ) + continue # [('>', '<'), ('>', '+'), ('+', '<'), ('+', '+')] + + # If k is not in conditions and left and right mark are heads '><', then motif is blocked + if varlag_k not in conditions and (left_mark == '>' and right_mark == '<'): + # print("Motif closed ", link_ik, varlag_k, link_kj, varlag_j ) + continue # [('>', '<'), ('>', '+'), ('+', '<'), ('+', '+')] + + # if (before_taumax and varlag_j not in end + # and varlag_j[1] >= -self.tau_max): + # # print("before_taumax ", varlag_j) + # continue + + if (hidden_variables is not None and varlag_j not in end + and varlag_j not in hidden_variables): + continue + + # Motif is open + # print("Motif open ", link_ik, varlag_k, link_kj, varlag_j ) + # start_from.add((link_kj, varlag_j)) + visited.add((link_kj, varlag_j)) + start_from.add((varlag_k, link_kj, varlag_j)) + # visited.add((varlag_k, link_kj, varlag_j)) + + + # print("Separated") + # sys.exit(0) + return False + + +
[docs] def get_optimal_set(self, + alternative_conditions=None, + minimize=False, + return_separate_sets=False, + ): + """Returns optimal adjustment set. + + See Runge NeurIPS 2021. + + Parameters + ---------- + alternative_conditions : set of tuples + Used only internally in optimality theorem. If None, self.S is used. + minimize : {False, True, 'colliders_only'} + Minimize optimal set. If True, minimize such that no subset + can be removed without making it invalid. If 'colliders_only', + only colliders are minimized. + return_separate_sets : bool + Whether to return tuple of parents, colliders, collider_parents, and S. + + Returns + ------- + Oset_S : False or list or tuple of lists + Returns optimal adjustment set if a valid set exists, otherwise False. + """ + + + # Needed for optimality theorem where Osets for alternative S are tested + if alternative_conditions is None: + S = self.S.copy() + vancs = self.vancs.copy() + else: + S = alternative_conditions + vancs = self._get_ancestors(list(self.X.union(self.Y).union(S))) - self.forbidden_nodes + + descendants = self._get_descendants(self.Y.union(self.M)) + + ## + ## Construct O-set + ## + + # Start with parents + parents = self._get_all_parents(self.Y.union(self.M)) # set([]) + + # Remove forbidden nodes + parents = parents - self.forbidden_nodes + + # Construct valid collider path nodes + colliders = set([]) + for w in self.Y.union(self.M): + j, tau = w + this_level = [w] + non_suitable_nodes = [] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + suitable_spouses = set(self._get_spouses(varlag)) - set(non_suitable_nodes) + for spouse in suitable_spouses: + i, tau = spouse + if spouse in self.X: + return False + + if (# Node not already in set + spouse not in colliders #.union(parents) + # not forbidden + and spouse not in self.forbidden_nodes + # in time bounds + and (-self.tau_max <= tau <= 0) # or self.ignore_time_bounds) + and (spouse in vancs + or not self._check_path(#graph=self.graph, + start=self.X, end=[spouse], + conditions=list(parents.union(vancs)) + list(S), + )) + ): + colliders = colliders.union(set([spouse])) + next_level.append(spouse) + else: + if spouse not in colliders: + non_suitable_nodes.append(spouse) + + + this_level = set(next_level) - set(non_suitable_nodes) + + # Add parents and raise Error if not identifiable + collider_parents = self._get_all_parents(colliders) + if len(self.X.intersection(collider_parents)) > 0: + return False + + colliders_and_their_parents = colliders.union(collider_parents) + + # Add valid collider path nodes and their parents + Oset = parents.union(colliders_and_their_parents) + + + if minimize: + removable = [] + # First remove all those that have no path from X + sorted_Oset = Oset + if minimize == 'colliders_only': + sorted_Oset = [node for node in sorted_Oset if node not in parents] + + for node in sorted_Oset: + if (not self._check_path(#graph=self.graph, + start=self.X, end=[node], + conditions=list(Oset - set([node])) + list(S))): + removable.append(node) + + Oset = Oset - set(removable) + if minimize == 'colliders_only': + sorted_Oset = [node for node in Oset if node not in parents] + + removable = [] + # Next remove all those with no direct connection to Y + for node in sorted_Oset: + if (not self._check_path(#graph=self.graph, + start=[node], end=self.Y, + conditions=list(Oset - set([node])) + list(S) + list(self.X), + ends_with='**>')): + removable.append(node) + + Oset = Oset - set(removable) + + Oset_S = Oset.union(S) + + # For singleton X the validity is already checked in the + # if-statements of the construction algorithm, but for + # multivariate X there might be further cases... Hence, + # we here explicitely check validity + if self._check_validity(list(Oset_S)) is False: + return False + + if return_separate_sets: + return parents, colliders, collider_parents, S + else: + return list(Oset_S)
+ + + def _get_collider_paths_optimality(self, source_nodes, target_nodes, + condition, + inside_set=None, + start_with_tail_or_head=False, + # possible=False + ): + """Iterates over collider paths within O-set via depth-first search + + """ + + for w in source_nodes: + # Only used to return *all* collider paths + # (needed in optimality theorem) + + coll_path = [] + + queue = [(w, coll_path)] + + non_valid_subsets = [] + + while queue: + + varlag, coll_path = queue.pop() + + coll_path = coll_path + [varlag] + + suitable_nodes = set(self._get_spouses(varlag)) + + if start_with_tail_or_head and coll_path == [w]: + children = set(self._get_children(varlag)) + suitable_nodes = suitable_nodes.union(children) + + for node in suitable_nodes: + i, tau = node + if ((-self.tau_max <= tau <= 0) # or self.ignore_time_bounds) + and node not in coll_path): + + if condition == 'II' and node not in target_nodes and node not in self.vancs: + continue + + if node in inside_set: + if condition == 'I': + non_valid = False + for pathset in non_valid_subsets[::-1]: + if set(pathset).issubset(set(coll_path + [node])): + non_valid = True + break + if non_valid is False: + queue.append((node, coll_path)) + else: + continue + elif condition == 'II': + queue.append((node, coll_path)) + + if node in target_nodes: + # yield coll_path + # collider_paths[node].append(coll_path) + if condition == 'I': + # Construct OπiN + Sprime = self.S.union(coll_path) + OpiN = self.get_optimal_set(alternative_conditions=Sprime) + if OpiN is False: + queue = [(q_node, q_path) for (q_node, q_path) in queue if set(coll_path).issubset(set(q_path + [q_node])) is False] + non_valid_subsets.append(coll_path) + else: + return False + + elif condition == 'II': + return True + # yield coll_path + + if condition == 'I': + return True + elif condition == 'II': + return False + # return collider_paths + + +
[docs] def check_optimality(self): + """Check whether optimal adjustment set exists. + + See Theorem 3 in paper. + + Returns + ------- + optimality : bool + Returns True if optimal adjustment set exists, otherwise False. + """ + + # Cond. 0: Exactly one valid adjustment set exists + cond_0 = (self._get_all_valid_adjustment_sets(check_one_set_exists=True)) + + # + # Cond. I + # + parents, colliders, collider_parents, _ = self.get_optimal_set(return_separate_sets=True) + Oset = parents.union(colliders).union(collider_parents) + n_nodes = self._get_all_spouses(self.Y.union(self.M).union(colliders)) - self.forbidden_nodes - Oset - self.S - self.Y - self.M - colliders + + if (len(n_nodes) == 0): + # # (1) There are no spouses N ∈ sp(YMC) \ (forbOS) + cond_I = True + else: + + # (2) For all N ∈ N and all its collider paths i it holds that + # OπiN does not block all non-causal paths from X to Y + # cond_I = True + cond_I = self._get_collider_paths_optimality( + source_nodes=list(n_nodes), target_nodes=list(self.Y.union(self.M)), + condition='I', + inside_set=Oset.union(self.S), start_with_tail_or_head=False, + ) + + # + # Cond. II + # + e_nodes = Oset.difference(parents) + cond_II = True + for E in e_nodes: + Oset_minusE = Oset.difference(set([E])) + if self._check_path(#graph=self.graph, + start=list(self.X), end=[E], + conditions=list(self.S) + list(Oset_minusE)): + + cond_II = self._get_collider_paths_optimality( + target_nodes=self.Y.union(self.M), + source_nodes=list(set([E])), + condition='II', + inside_set=list(Oset.union(self.S)), + start_with_tail_or_head = True) + + if cond_II is False: + if self.verbosity > 1: + print("Non-optimal due to E = ", E) + break + + # print("Optimality = ", cond_0, cond_I, cond_II) + optimality = (cond_0 or (cond_I and cond_II)) + return optimality
+ + def _check_validity(self, Z): + """Checks whether Z is a valid adjustment set.""" + + # causal_children = list(self.M.union(self.Y)) + backdoor_path = self._check_path(#graph=self.graph, + start=list(self.X), end=list(self.Y), + conditions=list(Z), + # causal_children=causal_children, + path_type = 'non_causal') + + if backdoor_path: + return False + else: + return True + + def _get_adjust_set(self, + minimize=False, + ): + """Returns Adjust-set. + + See van der Zander, B.; Liśkiewicz, M. & Textor, J. + Separators and adjustment sets in causal graphs: Complete + criteria and an algorithmic framework + Artificial Intelligence, Elsevier, 2019, 270, 1-40 + + """ + + vancs = self.vancs.copy() + + if minimize: + # Get removable nodes by computing minimal valid set from Z + if minimize == 'keep_parentsYM': + minimize_nodes = vancs - self._get_all_parents(list(self.Y.union(self.M))) + + else: + minimize_nodes = vancs + + # Zprime2 = Zprime + # First remove all nodes that have no unique path to X given Oset + for node in minimize_nodes: + # path = self.oracle.check_shortest_path(X=X, Y=[node], + # Z=list(vancs - set([node])), + # max_lag=None, + # starts_with=None, #'arrowhead', + # forbidden_nodes=None, #list(Zprime - set([node])), + # return_path=False) + path = self._check_path(#graph=self.graph, + start=self.X, end=[node], + conditions=list(vancs - set([node])), + ) + + if path is False: + vancs = vancs - set([node]) + + if minimize == 'keep_parentsYM': + minimize_nodes = vancs - self._get_all_parents(list(self.Y.union(self.M))) + else: + minimize_nodes = vancs + + # print(Zprime2) + # Next remove all nodes that have no unique path to Y given Oset_min + # Z = Zprime2 + for node in minimize_nodes: + + path = self._check_path(#graph=self.graph, + start=[node], end=self.Y, + conditions=list(vancs - set([node])) + list(self.X), + ) + + if path is False: + vancs = vancs - set([node]) + + if self._check_validity(list(vancs)) is False: + return False + else: + return list(vancs) + + + def _get_all_valid_adjustment_sets(self, + check_one_set_exists=False, yield_index=None): + """Constructs all valid adjustment sets or just checks whether one exists. + + See van der Zander, B.; Liśkiewicz, M. & Textor, J. + Separators and adjustment sets in causal graphs: Complete + criteria and an algorithmic framework + Artificial Intelligence, Elsevier, 2019, 270, 1-40 + + """ + + cond_set = set(self.S) + all_vars = [(i, -tau) for i in range(self.N) + for tau in range(0, self.tau_max + 1)] + + all_vars_set = set(all_vars) - self.forbidden_nodes + + + def find_sep(I, R): + Rprime = R - self.X - self.Y + # TODO: anteriors and NOT ancestors where + # anteriors include --- links in causal paths + # print(I) + XYI = list(self.X.union(self.Y).union(I)) + # print(XYI) + ancs = self._get_ancestors(list(XYI)) + Z = ancs.intersection(Rprime) + if self._check_validity(Z) is False: + return False + else: + return Z + + + def list_sep(I, R): + # print(find_sep(X, Y, I, R)) + if find_sep(I, R) is not False: + # print(I,R) + if I == R: + # print('--->', I) + yield I + else: + # Pick arbitrary node from R-I + RminusI = list(R - I) + # print(R, I, RminusI) + v = RminusI[0] + # print("here ", X, Y, I.union(set([v])), R) + yield from list_sep(I.union(set([v])), R) + yield from list_sep(I, R - set([v])) + + # print("all ", X, Y, cond_set, all_vars_set) + all_sets = [] + I = cond_set + R = all_vars_set + for index, valid_set in enumerate(list_sep(I, R)): + # print(valid_set) + all_sets.append(list(valid_set)) + if check_one_set_exists and index > 0: + break + + if yield_index is not None and index == yield_index: + return valid_set + + if yield_index is not None: + return None + + if check_one_set_exists: + if len(all_sets) == 1: + return True + else: + return False + + return all_sets + + + def _get_causal_paths(self, source_nodes, target_nodes, + mediators=None, + mediated_through=None, + proper_paths=True, + ): + """Returns causal paths via depth-first search. + + """ + + source_nodes = set(source_nodes) + target_nodes = set(target_nodes) + + if mediators is None: + mediators = set() + else: + mediators = set(mediators) + + if mediated_through is None: + mediated_through = [] + mediated_through = set(mediated_through) + + if proper_paths: + inside_set = mediators.union(target_nodes) - source_nodes + else: + inside_set = mediators.union(target_nodes).union(source_nodes) + + all_causal_paths = {} + for w in source_nodes: + all_causal_paths[w] = {} + for z in target_nodes: + all_causal_paths[w][z] = [] + + for w in source_nodes: + + causal_path = [] + queue = [(w, causal_path)] + + while queue: + + varlag, causal_path = queue.pop() + causal_path = causal_path + [varlag] + suitable_nodes = set(self._get_children(varlag) + ).intersection(inside_set) + for node in suitable_nodes: + i, tau = node + if ((-self.tau_max <= tau <= 0) # or self.ignore_time_bounds) + and node not in causal_path): + + queue.append((node, causal_path)) + + if node in target_nodes: + if len(mediated_through) > 0 and len(set(causal_path).intersection(mediated_through)) == 0: + continue + else: + all_causal_paths[w][node].append(causal_path + [node]) + + return all_causal_paths + + +
[docs] def fit_total_effect(self, + dataframe, + estimator, + adjustment_set='optimal', + conditional_estimator=None, + data_transform=None, + mask_type=None, + ): + """Returns a fitted model for the total causal effect of X on Y + conditional on S. + + Parameters + ---------- + dataframe : data object + Tigramite dataframe object. It must have the attributes dataframe.values + yielding a numpy array of shape (observations T, variables N) and + optionally a mask of the same shape and a missing values flag. + estimator : sklearn model object + For example, sklearn.linear_model.LinearRegression() for a linear + regression model. + adjustment_set : str or list of tuples + If 'optimal' the Oset is used, if 'minimized_optimal' the minimized Oset, + and if 'colliders_minimized_optimal', the colliders-minimized Oset. + If a list of tuples is passed, this set is used. + conditional_estimator : sklearn model object, optional (default: None) + Used to fit conditional causal effects in nested regression. + If None, the same model as for estimator is used. + data_transform : sklearn preprocessing object, optional (default: None) + Used to transform data prior to fitting. For example, + sklearn.preprocessing.StandardScaler for simple standardization. The + fitted parameters are stored. + mask_type : {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. Explained in tutorial on masking and missing values. + """ + + self.dataframe = dataframe + self.conditional_estimator = conditional_estimator + + if adjustment_set == 'optimal': + # Check optimality and use either optimal or colliders_only set + adjustment_set = self.get_optimal_set() + elif adjustment_set == 'colliders_minimized_optimal': + adjustment_set = self.get_optimal_set(minimize='colliders_only') + elif adjustment_set == 'minimized_optimal': + adjustment_set = self.get_optimal_set(minimize=True) + else: + if self._check_validity(adjustment_set) is False: + raise ValueError("Chosen adjustment_set is not valid.") + + self.adjustment_set = adjustment_set + + # Fit model of Y on X and Z (and conditions) + # Build the model + self.model = Models( + dataframe=dataframe, + model=estimator, + conditional_model=conditional_estimator, + data_transform=data_transform, + mask_type=mask_type, + verbosity=self.verbosity) + + self.model.get_general_fitted_model( + Y=self.listY, X=self.listX, Z=list(self.adjustment_set), + conditions=self.listS, + tau_max=self.tau_max, + cut_off='max_lag_or_tau_max', + return_data=False) + + return self
+ +
[docs] def predict_total_effect(self, + intervention_data, + conditions_data=None, + pred_params=None, + ): + """Predict effect of intervention with fitted model. + + Uses the model.predict() function of the sklearn model. + + Parameters + ---------- + intervention_data : numpy array + Numpy array of shape (time, len(X)) that contains the do(X) values. + conditions_data : data object, optional + Numpy array of shape (time, len(S)) that contains the S=s values. + pred_params : dict, optional + Optional parameters passed on to sklearn prediction function. + + Returns + ------- + Results from prediction: an array of shape (time, len(Y)). + """ + if intervention_data.shape[1] != len(self.X): + raise ValueError("intervention_data.shape[1] must be len(X).") + + if conditions_data is not None: + if conditions_data.shape[1] != len(self.S): + raise ValueError("conditions_data.shape[1] must be len(S).") + if conditions_data.shape[0] != intervention_data.shape[0]: + raise ValueError("conditions_data.shape[0] must match intervention_data.shape[0].") + + effect = self.model.get_general_prediction( + intervention_data=intervention_data, + conditions_data=conditions_data, + pred_params=pred_params) + + return effect
+ +
[docs] def fit_wright_effect(self, + dataframe, + mediation=None, + method='parents', + links_coeffs=None, + data_transform=None, + mask_type=None, + ): + """Returns a fitted model for the total or mediated causal effect of X on Y + through mediator variables. + + Parameters + ---------- + dataframe : data object + Tigramite dataframe object. It must have the attributes dataframe.values + yielding a numpy array of shape (observations T, variables N) and + optionally a mask of the same shape and a missing values flag. + mediation : None, 'direct', or list of tuples + If None, total effect is estimated, if 'direct' then only the direct effect is estimated, + else only those causal paths are considerd that pass at least through one of these mediator nodes. + method : {'parents', 'links_coeffs', 'optimal'} + Method to use for estimating Wright's path coefficients. If 'optimal', + the Oset is used, if 'links_coeffs', the coefficients in links_coeffs are used, + if 'parents', the parents are used (only valid for DAGs). + links_coeffs : dict + Only used if method = 'links_coeffs'. + Dictionary of format: {0:[((i, -tau), coeff),...], 1:[...], + ...} for all variables where i must be in [0..N-1] and tau >= 0 with + number of variables N. coeff must be a float. + data_transform : sklearn preprocessing object, optional (default: None) + Used to transform data prior to fitting. For example, + sklearn.preprocessing.StandardScaler for simple standardization. The + fitted parameters are stored. + mask_type : {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. Explained in tutorial on masking and missing values. + """ + + import sklearn.linear_model + + self.dataframe = dataframe + estimator = sklearn.linear_model.LinearRegression() + + # Fit model of Y on X and Z (and conditions) + # Build the model + self.model = Models( + dataframe=dataframe, + model=estimator, + data_transform=data_transform, + mask_type=mask_type, + verbosity=self.verbosity) + + mediators = self.get_mediators(start=self.X, end=self.Y) + + if mediation == 'direct': + causal_paths = {} + for w in self.X: + causal_paths[w] = {} + for z in self.Y: + if w in self._get_parents(z): + causal_paths[w][z] = [[w, z]] + else: + causal_paths[w][z] = [] + else: + causal_paths = self._get_causal_paths(source_nodes=self.X, + target_nodes=self.Y, mediators=mediators, + mediated_through=mediation, proper_paths=True) + + if method == 'links_coeffs': + coeffs = {} + max_lag = 0 + for medy in [med for med in mediators] + [y for y in self.listY]: + coeffs[medy] = {} + for ipar, par_coeff in enumerate(links_coeffs[medy[0]]): + par, coeff, _ = par_coeff + max_lag = max(abs(par[1]), max_lag) + coeffs[medy][par] = coeff #self.fit_results[j][(j, 0)]['model'].coef_[ipar] + + self.model.tau_max = max_lag + + elif method == 'optimal': + # all_parents = {} + coeffs = {} + for medy in [med for med in mediators] + [y for y in self.listY]: + coeffs[medy] = {} + mediator_parents = self._get_all_parents([medy]).intersection(mediators.union(self.X)) - set([medy]) + all_parents = self._get_all_parents([medy]) - set([medy]) + for par in mediator_parents: + Sprime = set(all_parents) - set([par, medy]) + causal_effects = CausalEffects(graph=self.graph, + X=[par], Y=[medy], S=Sprime, + graph_type=self.graph_type, + check_SM_overlap=False, + ) + oset = causal_effects.get_optimal_set() + if oset is False: + raise ValueError("Not identifiable via Wright's method.") + fit_res = self.model.get_general_fitted_model( + Y=[medy], X=[par], Z=oset, + tau_max=self.tau_max, + cut_off='max_lag_or_tau_max', + return_data=False) + coeffs[medy][par] = fit_res[medy]['model'].coef_[0] + + elif method == 'parents': + if 'dag' not in self.graph_type: + raise ValueError("method == 'parents' only possible for DAGs") + + coeffs = {} + for medy in [med for med in mediators] + [y for y in self.listY]: + coeffs[medy] = {} + # mediator_parents = self._get_all_parents([medy]).intersection(mediators.union(self.X)) - set([medy]) + all_parents = self._get_all_parents([medy]) - set([medy]) + # print(j, all_parents[j]) + # if len(all_parents[j]) > 0: + fit_res = self.model.get_general_fitted_model( + Y=[medy], X=list(all_parents), Z=[], + conditions=None, + tau_max=self.tau_max, + cut_off='max_lag_or_tau_max', + return_data=False) + + for ipar, par in enumerate(all_parents): + coeffs[medy][par] = fit_res[medy]['model'].coef_[ipar] + + else: + raise ValueError("method must be 'optimal', 'links_coeffs', or 'parents'.") + + # Effect is sum over products over all path coefficients + # from x in X to y in Y + effect = {} + for (x, y) in itertools.product(self.listX, self.listY): + effect[(x, y)] = 0. + for causal_path in causal_paths[x][y]: + effect_here = 1. + for index, node in enumerate(causal_path[:-1]): + i, taui = node + j, tauj = causal_path[index + 1] + # tau_ij = abs(tauj - taui) + effect_here *= coeffs[(j, tauj)][(i, taui)] + + effect[(x, y)] += effect_here + + + # Modify and overwrite variables in self.model + self.model.Y = self.listY + self.model.X = self.listX + self.model.Z = [] + self.model.conditions = [] + self.model.cut_off = 'max_lag_or_tau_max' + + class dummy_fit_class(): + def __init__(self, y_here, listX_here, effect_here): + dim = len(listX_here) + self.coeff_array = np.array([effect_here[(x, y_here)] for x in listX_here]).reshape(dim, 1) + def predict(self, X): + return np.dot(X, self.coeff_array).squeeze() + + fit_results = {} + for y in self.listY: + fit_results[y] = {} + fit_results[y]['model'] = dummy_fit_class(y, self.listX, effect) + fit_results[y]['data_transform'] = deepcopy(data_transform) + + # self.effect = effect + self.model.fit_results = fit_results + return self
+ + +
[docs] def predict_wright_effect(self, + intervention_data=None, + pred_params=None, + ): + """Predict linear effect of intervention with fitted Wright-model. + + Parameters + ---------- + intervention_data : numpy array + Numpy array of shape (time, len(X)) that contains the do(X) values. + pred_params : dict, optional + Optional parameters passed on to sklearn prediction function. + + Returns + ------- + Results from prediction: an array of shape (time, len(Y)). + """ + if intervention_data.shape[1] != len(self.X): + raise ValueError("intervention_data.shape[1] must be len(X).") + + effect = self.model.get_general_prediction( + intervention_data=intervention_data, + conditions_data=None, + pred_params=pred_params) + + return effect
+ + +if __name__ == '__main__': + + import sys + import tigramite.data_processing as pp + import tigramite.toymodels.structural_causal_processes as toys + import tigramite.plotting as tp + from tigramite.independence_tests import OracleCI + from tigramite.data_processing import DataFrame + + import sklearn + from sklearn.linear_model import LinearRegression + from sklearn.neighbors import KNeighborsRegressor + from sklearn.neural_network import MLPRegressor + from sklearn.ensemble import RandomForestRegressor + + graph = np.array([[['', '-->', ''], + ['', '', ''], + ['', '', '']], + [['', '-->', ''], + ['', '-->', ''], + ['-->', '', '-->']], + [['', '', ''], + ['<--', '', ''], + ['', '-->', '']]], dtype='<U3') + + X = [(1,-2)] + Y = [(2,0)] + causal_effects = CausalEffects(graph, graph_type='stationary_dag', X=X, Y=Y, S=None, + hidden_variables=None, + verbosity=1) + var_names = ['$X^0$', '$X^1$', '$X^2$'] + tp.plot_time_series_graph(graph = causal_effects.graph, + var_names=var_names, + save_name='Example.pdf', + figsize = (8, 4), + ); + sys.exit(0) + + T = 10000 + np.random.seed(5) + Zdata = np.random.randn(T) + Sdata = Zdata + np.random.randn(T) #np.random.choice(a=[-1., 1.], size=T) + Xdata = np.random.randn(T) + Sdata*Zdata + Ydata = 0.7*Sdata*Xdata + Zdata + np.random.randn(T) + data = np.vstack((Xdata, Ydata, Sdata, Zdata)).T + dataframe = pp.DataFrame(data) + + graph = np.array([['', '-->', '<--', '<--'], + ['<--', '', '<--', '<--'], + ['-->', '-->', '', '<--'], + ['-->', '-->', '-->', '']], dtype='<U3') + + X = [(0,0)] + Y = [(1,0)] + S = [(2,0)] + causal_effects = CausalEffects(graph, graph_type='admg', + X=X, Y=Y, S=S, hidden_variables=None, + verbosity=0) + print(causal_effects.get_optimal_set()) + # Fit causal effect model from observational data + causal_effects.fit_total_effect( + dataframe=dataframe, + estimator=MLPRegressor(max_iter=200), #MLPRegressor(max_iter=200), + adjustment_set='optimal', + conditional_estimator=LinearRegression(), + data_transform=None, + mask_type=None, + ) + + # Copy original observational data + # intervention_data = np.ones((1, 1)) + # conditions_data = np.ones((1, 1)) + + # Set X to intervened values given S=-1, 1 + # S=-1 + conditions_data = np.linspace(-1, 1, 10).reshape(10, 1) + + intervention_data = 1.*np.ones((10, 1)) + y1 = causal_effects.predict_total_effect( + intervention_data=intervention_data, + conditions_data=conditions_data, + ) + + intervention_data = 0.*np.ones((10, 1)) + y2 = causal_effects.predict_total_effect( + intervention_data=intervention_data, + conditions_data=conditions_data, + ) + # for y in Y: + beta = (y1 - y2) + print(beta) + # print("Causal effect for S = % .2f is %.2f" %(cond_value,)) + + sys.exit() + + + def lin_f(x): return x + conf_coeff = 0. + coeff = .5 + links = { + 0: [], #((0, -1), auto_coeff, lin_f)], + 1: [((0, 0), coeff, lin_f), ((5, 0), conf_coeff, lin_f)], + 2: [((1, 0), coeff, lin_f), ((5, 0), conf_coeff, lin_f)], + 3: [((1, 0), coeff, lin_f), ((2, 0), coeff, lin_f), ((6, 0), conf_coeff, lin_f), ((8, 0), conf_coeff, lin_f)], + 4: [((5, 0), conf_coeff, lin_f), ((8, 0), conf_coeff, lin_f)], #, ((7, 0), coeff, lin_f),], + 5: [], + 6: [], + 7: [], #((0, 0), coeff, lin_f)], + 8: [], + } + T = 10000 + data, nonstat = toys.structural_causal_process(links, T=T, noises=None, seed=7) + dataframe = pp.DataFrame(data) + + graph = np.array([['', '-->', '', '', '', '', ''], + ['<--', '', '-->', '-->', '', '<--', ''], + ['', '<--', '', '-->', '', '<--', ''], + ['', '<--', '<--', '', '<--', '', '<--'], + ['', '', '', '-->', '', '<--', ''], + ['', '-->', '-->', '', '-->', '', ''], + ['', '', '', '-->', '', '', '']], dtype='<U3') + + X = [(0,0), (1,0)] + Y = [(3,0)] + causal_effects = CausalEffects(graph, graph_type='dag', X=X, Y=Y, S=None, hidden_variables=None, + verbosity=1) + # Just for plotting purposes + var_names = ['$X_1$', '$X_2$', '$M$', '$Y$', '$Z_1$', '$Z_2$', '$Z_3$'] + + opt = causal_effects.get_optimal_set() + print("Oset = ", [(var_names[v[0]], v[1]) for v in opt]) + special_nodes = {} + for node in causal_effects.X: + special_nodes[node] = 'red' + for node in causal_effects.Y: + special_nodes[node] = 'blue' + for node in opt: + special_nodes[node] = 'orange' + for node in causal_effects.M: + special_nodes[node] = 'lightblue' + + + tp.plot_graph(graph = causal_effects.graph, + var_names=var_names, + save_name='Example-new.pdf', + figsize = (6, 6), + special_nodes=special_nodes + ) + + causal_effects.fit_wright_effect(dataframe=dataframe, + mediation = 'direct', #[(2, 0)], + method='parents' + ) + + intervention_data = data.copy() + + # Set X to intervened values + intervention_data[:,[x[0] for x in X]] = 1. + y1 = causal_effects.predict_wright_effect( + intervention_data=pp.DataFrame(intervention_data), + ) + + intervention_data[:,[x[0] for x in X]] = 0. + y2 = causal_effects.predict_wright_effect( + intervention_data=pp.DataFrame(intervention_data), + ) + + for y in Y: + beta = (y1[y] - y2[y]).mean() + print("Causal effect = %.2f" %(beta)) + sys.exit(0) + + + + graph = np.array([[['', '-->', ''], + ['', '', ''], + ['', '', '']], + [['', '-->', ''], + ['', '-->', ''], + ['-->', '', '-->']], + [['', '', ''], + ['<--', '', ''], + ['', '-->', '']]], dtype='<U3') + + X = [(1,-2)] + Y = [(2,0)] + causal_effects = CausalEffects(graph, graph_type='stationary_dag', X=X, Y=Y, S=None, + hidden_variables=None, + verbosity=1) + var_names = ['$X^0$', '$X^1$', '$X^2$'] + opt = causal_effects.get_optimal_set() + print("Oset = ", [(var_names[v[0]], v[1]) for v in opt]) + special_nodes = {} + for node in causal_effects.X: + special_nodes[node] = 'red' + for node in causal_effects.Y: + special_nodes[node] = 'blue' + for node in opt: + special_nodes[node] = 'orange' + for node in causal_effects.M: + special_nodes[node] = 'lightblue' + + tp.plot_time_series_graph(graph = causal_effects.graph, + var_names=var_names, + save_name='Example-new.pdf', + figsize = (10, 4), + special_nodes=special_nodes, + ) + sys.exit(0) + + + # Example from NeurIPS 2021 paper Fig. 1A + coeff = .5 + conf_coeff = 2. + conf_coeff2 = 1. + def lin_f(x): return x + def nonlin_f(x): return (x + 5. * x ** 2 * np.exp(-x ** 2 / 20.)) + + # Non-time series example + # links = { + # 0: [((3, 0), conf_coeff, lin_f), ((6, 0), conf_coeff, lin_f)], + # 1: [((0, 0), coeff, lin_f), ((4, 0), conf_coeff, lin_f)], + # 2: [((0, 0), coeff, lin_f), ((1, 0), coeff, lin_f), ((4, 0), conf_coeff, lin_f), ((7, 0), conf_coeff, lin_f)], #, ((1, 0), coeff, lin_f)], + # 3: [((6, 0), conf_coeff, lin_f)], + # 4: [((3, 0), conf_coeff2, lin_f)], + # 5: [((4, 0), conf_coeff, lin_f), ((7, 0), conf_coeff, lin_f)], + # 6: [], + # 7: []} + + # Same example with time-structure + # auto_coeff = 0.3 + # links = { + # 0: [((0, -1), auto_coeff, lin_f), ((3, 0), conf_coeff, lin_f), ((6, 0), conf_coeff, lin_f)], + # 1: [((1, -1), auto_coeff, lin_f), ((0, -1), coeff, lin_f), ((4, 0), conf_coeff, lin_f)], + # 2: [((2, -1), auto_coeff, lin_f), ((0, -2), coeff, lin_f), ((1, -1), coeff, lin_f), ((4, -1), conf_coeff, lin_f), ((7, 0), conf_coeff, lin_f)], #, ((1, 0), coeff, lin_f)], + # 3: [((3, -1), auto_coeff, lin_f), ((6, 0), conf_coeff, lin_f)], + # 4: [((4, -1), auto_coeff, lin_f), ((3, -1), conf_coeff2, lin_f)], + # 5: [((5, -1), auto_coeff, lin_f), ((4, -1), conf_coeff, lin_f), ((7, 0), conf_coeff, lin_f)], #, ((8, -1), conf_coeff, lin_f), ((8, 0), conf_coeff, lin_f)], + # 6: [], + # 7: [], + # 8: []} + + # DAG version of Non-time series example + # links = { + # 0: [((3, 0), conf_coeff, lin_f)], + # 1: [((0, 0), coeff, lin_f), ((4, 0), conf_coeff, lin_f)], + # 2: [((0, 0), coeff, lin_f), ((1, 0), coeff, lin_f), ((4, 0), conf_coeff, lin_f)], #, ((1, 0), coeff, lin_f)], + # 3: [], + # 4: [((3, 0), conf_coeff2, lin_f)], + # 5: [((4, 0), conf_coeff, lin_f)],} + + # observed_vars = [0, 1, 2, 3, 4, 5] + # var_names = ['X', 'M', 'Y', 'Z1', 'Z2', 'Z3'] + # X = [(0, 0)] #, (0, -2)] + # Y = [(2, 0)] + # conditions = [] # called 'S' in paper + + # DAG version of time series example + # auto_coeff = 0.3 + # links = { + # 0: [((0, -1), auto_coeff, lin_f),((3, 0), conf_coeff, lin_f)], + # 1: [((1, -1), auto_coeff, lin_f),((0, -1), coeff, lin_f), ((4, 0), conf_coeff, lin_f)], + # 2: [((2, -1), auto_coeff, lin_f),((0, -2), coeff, lin_f), ((1, -1), coeff, lin_f), ((4, -1), conf_coeff, lin_f), ((5, 0), coeff, lin_f)], + # 3: [((3, -1), auto_coeff, lin_f),], + # 4: [((4, -1), auto_coeff, lin_f),((3, -1), conf_coeff2, lin_f)], + # 5: [((5, -1), auto_coeff, lin_f),((4, -1), conf_coeff, lin_f)],} + + # observed_vars = [0, 1, 2, 3, 4, 5] + # var_names = ['X', 'M', 'Y', 'Z1', 'Z2', 'Z3'] + # X = [(0, -1), (0, -2), (0, -3)] + # Y = [(2, 0), (2, -1)] + # conditions = [] # called 'S' in paper + + ### TESTING + auto_coeff = 0.8 + coeff = 1. + links = { + 0: [], #((0, -1), auto_coeff, lin_f)], + 1: [((0, 0), coeff, lin_f), ((5, 0), coeff, lin_f)], + 2: [((1, 0), coeff, lin_f), ((5, 0), coeff, lin_f)], + 3: [((1, 0), coeff, lin_f), ((2, 0), coeff, lin_f), ((6, 0), coeff, lin_f), ((8, 0), coeff, lin_f)], + 4: [((5, 0), coeff, lin_f), ((8, 0), coeff, lin_f)], #, ((7, 0), coeff, lin_f),], + 5: [], + 6: [], + 7: [], #((0, 0), coeff, lin_f)], + 8: [], + } + # links = { + # 0: [((0, -1), 1, lin_f), ((1, -1), 1, lin_f)], + # 1: [((1, -1), 2., lin_f)], + # 2: [((1, 0), 2., lin_f), ((1, -2), 2., lin_f), ((2, -1), 2., lin_f)], + # # 3: [((2, 0), 2., lin_f), ((1, 0), 2., lin_f)], + # } + + # observed_vars = [1, 2] + var_names = ['$X_1$', '$X_2$', '$M$', '$Y$', '$Z_1$', '$Z_2$', '$Z_3$', '$Z_4$', '$L$'] + X = [(0, 0), (1, 0)] #, (0, -2), (0, -3)] + Y = [(3, 0)] #, (1, -1)] + conditions = [] # [(4,0)] # called 'S' in paper + hidden_variables = [(8,0)] + # [ + # (0, 0), (0, -1), + # (1, -3), + # (3, -3), + # (3, -1), (3, 0), + # ] + tau_max_dag = 0 + tau_max_admg = 0 + graph_type = 'dag' + + int_value1 = 0. + int_value2 = 2. + + # ### TESTING DUNCAN + # coeff = 0.3 + # links = { + # 0: [((0, -1), coeff, lin_f), ((1, -1), coeff, lin_f), ((4, 0), coeff, lin_f)], + # 1: [((2, 0), coeff, lin_f), ((3, 0), coeff, lin_f), ((0, 0), coeff, lin_f)], + # 2: [((2, -1), coeff, lin_f), ((0, 0), coeff, lin_f), ((3, 0), coeff, lin_f)], + # 3: [((4, 0), coeff, lin_f), ((1, -1), coeff, lin_f), ((2, -1), coeff, lin_f)], + # 4:[((4, -1), coeff, lin_f)] + # } + + # observed_vars = [0, 1, 3, 4] + # var_names = ['N', 'P', 'T', 'M'] + # X = [(0, -2)] #, (0, -2), (0, -3)] + # Y = [(2, 0)] #, (1, -1)] + # conditions = [(3, 0)] # called 'S' in paper + ##### + + # ## Testing conditional effect estimation + # var_names = ['X', 'Y', 'S', 'Z'] + # X = [(0, 0)] #, (0, -2), (0, -3)] + # Y = [(1, 0)] #, (1, -1)] + # conditions = [(2, 0)] # called 'S' in paper + # links = { + # 0: [((3, 0), coeff, lin_f)], + # 1: [((0, 0), coeff, lin_f), ((3, 0), coeff, lin_f), ((2, 0), coeff, lin_f)], + # 2: [], + # 3: [((2, 0), coeff, lin_f)], + # } + # np.random.seed(41) + # data = np.random.randn(100000, 4) + # S_data = np.random.randint(-1, 2, size=100000) + + # # print(S_data) + # # data[:,1] += 5*data[:, 0] + # data[S_data==-1, 3] += -20. + # data[S_data==1, 3] += 20. + + # data[S_data==-1, 0] += + 15*data[S_data==-1, 3] + # data[S_data==0, 0] += - 5*data[S_data==0, 3] + # data[S_data==1, 0] += - 15*data[S_data==1, 3] + + # data[S_data==-1, 1] += -10*data[S_data==-1, 0] - 10*data[S_data==-1, 3] + # data[S_data==0, 1] += + 5*data[S_data==0, 3] + # data[S_data==1, 1] += 10*data[S_data==1, 0] + 20*data[S_data==1, 3] + # data[:,2] = S_data + + # # data=data[S_data==0] + cond_value = 0 + # dataframe = pp.DataFrame(data) + + + # if tau_max is None, graph.shape[2]-1 will be used + # tau_max = 2 # 4 for time series version + + oracle = OracleCI(links=links, + observed_vars=list(range(len(links))), + tau_max=tau_max_dag) + graph = oracle.graph + + # CHANGE: assume non-timeseries graph + graph = graph.squeeze() + # assert graph.ndim == 2 + # tau_max = graph.shape[2] - 1 + print(repr(graph)) + + # T = 10000 + # data, nonstat = toys.structural_causal_process(links, T=T, noises=None, seed=7) + # dataframe = pp.DataFrame(data) + + # Initialize class + causal_effects = CausalEffects(graph=graph, X=X, Y=Y, + S=conditions, + graph_type=graph_type, + # tau_max = tau_max_admg, + hidden_variables=hidden_variables, + verbosity=1) + + print(causal_effects.check_XYS_paths()) + # graph_plot = np.zeros((graph.shape[0], graph.shape[1], 5), dtype='<U3') + # graph_plot[:,:,:tau_max+1] = graph[:,:,:] + + # aux = causal_effects.graph + # aux[1, 3, 0, 0] = '<->' + # aux[3, 1, 0, 0] = '<->' + # aux[2, 3, 0, 0] = '<->' + # aux[3, 2, 0, 0] = '<->' + # causal_effects.graph = aux + print(repr(causal_effects.graph.squeeze())) + # tp.plot_time_series_graph(graph = causal_effects.graph, var_names=var_names, + # save_name='Example-new.pdf', + # figsize = (12, 8), + # ) + + opt = causal_effects.get_optimal_set() + print("\nOset = ", opt) + # print([(var_names[v[0]], v[1]) for v in opt]) + optimality = causal_effects.check_optimality() + print("(Graph, X, Y, S) fulfills optimality: ", optimality) + + special_nodes = {} + for node in causal_effects.X: + special_nodes[node] = 'red' + for node in causal_effects.Y: + special_nodes[node] = 'blue' + for node in opt: + special_nodes[node] = 'orange' + for node in causal_effects.get_mediators(start=X, end=Y): + special_nodes[node] = 'lightblue' + for node in causal_effects.hidden_variables: + # print(node) + special_nodes[node] = 'lightgrey' + + plot_graph = causal_effects.graph.squeeze() + + # Remove hidden_variables + plot_graph = np.delete(plot_graph, [h[0] for h in hidden_variables + [(7,0)]], axis=0) + plot_graph = np.delete(plot_graph, [h[0] for h in hidden_variables + [(7,0)]], axis=1) + + print(repr(plot_graph.squeeze())) + + + tp.plot_graph(graph = plot_graph, + var_names=var_names, + save_name='Example-new.pdf', + figsize = (12, 8), + # special_nodes=special_nodes, + # cmap_nodes = None, + # cmap_edges=None, + # show_colorbar=False, + ) + # plot_graph = np.expand_dims(causal_effects.graph.squeeze(), axis = 2) + # tp.plot_time_series_graph(graph = plot_graph, + # var_names=var_names, + # save_name='Example-new.pdf', + # figsize = (12, 8), + # special_nodes=special_nodes, + # ) + + + + # causal_effects._check_path(graph=causal_effects.graph, + # start=X, end=Y, + # conditions=[(0, -2), (2, -1), (1, -2), (1, -3), ]) + # tp.plot_time_series_graph(graph = graph, var_names=var_names, + # save_name='Example-Fig1A-TSG.pdf', + # figsize = (8, 8)) + + # aux_graph = causal_effects._get_latent_projection_graph() + # # print(aux_graph) + + # graph_plot = np.zeros((graph.shape[0], graph.shape[1], tau_max+1), dtype='<U3') + # graph_plot[:,:,:] = aux_graph[:,:,:,0] + # tp.plot_time_series_graph(graph = graph_plot, + # var_names=var_names, + # save_name='Example-Fig1A-auxTSG.pdf', + # figsize = (8, 8), + # aux_graph=aux_graph) + + + + if causal_effects._get_adjust_set() is False: + print("Not identifiable!") + + sys.exit(0) + + optimality = causal_effects.check_optimality() + print("(Graph, X, Y, S) fulfills optimality: ", optimality) + + # Adjust-set + adjust = causal_effects._get_adjust_set() + print("\nAdjust / Ancs set") + print([(var_names[v[0]], v[1]) for v in adjust]) + + # # Minimized Adjust-set + # adjust_min = causal_effects._get_adjust_set(minimize=True) + # print("\nMin Ancs set") + # print([(var_names[v[0]], v[1]) for v in adjust_min]) + + # # ParX-minimized Ancs-set + # adjust_pxmin = causal_effects._get_adjust_set(minimize='keep_parentsYM') + # print("\nMinParX Ancs set") + # print([(var_names[v[0]], v[1]) for v in adjust_pxmin]) + + # Optimal adjustment set + opt = causal_effects.get_optimal_set() + print("\nOset") + print([(var_names[v[0]], v[1]) for v in opt]) + + # # Minimized adjustment set + # opt_min = causal_effects.get_optimal_set(minimize=True) + # print("\nMin Oset") + # print([(var_names[v[0]], v[1]) for v in opt_min]) + + # opt_cmin = causal_effects.get_optimal_set(minimize='colliders_only') + # print("\nMinColl Oset") + # print([(var_names[v[0]], v[1]) for v in opt_cmin]) + + + # Plot graph + # if tau_max is not None: + # graph_plot = np.zeros((len(observed_vars), + # len(observed_vars), tau_max+1), dtype='<U3') + # graph_plot[:,:, :graph.shape[2]] = graph + # graph_plot[:,:, graph.shape[2]:] = "" + # # print(graph_plot.shape) + # # print(graph.shape) + # else: + # graph_plot = graph + + special_nodes = {} + for node in X: + special_nodes[node] = 'red' + for node in Y: + special_nodes[node] = 'blue' + for node in opt: + special_nodes[node] = 'orange' + for node in causal_effects.get_mediators(start=X, end=Y): + special_nodes[node] = 'lightblue' + for node in causal_effects.hidden_variables: + # print(node) + special_nodes[node] = 'lightgrey' + + + # tp.plot_graph(graph = causal_effects.graph, var_names=var_names, + # save_name='Example-Fig1A.pdf', + # figsize = (15, 15), node_size=0.2, + # special_nodes=special_nodes) + tp.plot_time_series_graph(graph = causal_effects.graph, var_names=var_names, + save_name='Example-Fig1A-TSG.pdf', + figsize = (12, 8), + special_nodes=special_nodes) + + + # sys.exit(0) + # + # estimator = LinearRegression() + # estimator = KNeighborsRegressor(n_neighbors=4) + estimator = MLPRegressor(max_iter=200) + # estimator = RandomForestRegressor() + + conditional_estimator = LinearRegression() + + causal_effects.fit_total_effect( + dataframe=dataframe, + estimator=estimator, + conditional_estimator=conditional_estimator, + adjustment_set='optimal', + data_transform=None, + mask_type=None, + ) + + # # # Causal effect in observational data + intervention_data1 = data.copy() + intervention_data1[:, X[0][0]] += int_value1 + intervention_data1 = pp.DataFrame(intervention_data1) + + # print(intervention_data1.values[:,X[0][0]]) + + # # Causal effect for interventional data + # # with + 1 added + intervention_data2 = data.copy() + intervention_data2[:, X[0][0]] += int_value2 + intervention_data2 = pp.DataFrame(intervention_data2) + + # print(intervention_data2.values[:,X[0][0]]) + + + ce_int1 = causal_effects.predict_total_effect( + intervention_data=intervention_data1, + conditions_data=None, + ) + + ce_int2 = causal_effects.predict_total_effect( + intervention_data=intervention_data2, + conditions_data=None, + ) + + # Ground truth: + data, nonstat = toys.structural_causal_process(links=links, T=T, noises=None, + intervention={X[0][0]: intervention_data1.values[:, X[0][0]]}, + intervention_type='hard', + seed=7) + true_ce_int1 = data[:,Y[0][0]].mean() + # print(data[:,X[0][0]]) + data, nonstat = toys.structural_causal_process(links=links, T=T, noises=None, + intervention={X[0][0]: intervention_data2.values[:, X[0][0]]}, + intervention_type='hard', + seed=7) + true_ce_int2 = data[:,Y[0][0]].mean() + # print(data[:,X[0][0]]) + + # causal_effects.fit_wright_effect(dataframe=dataframe, + # links_coeffs=links, + # method = 'optimal', + # mediation=[], + # # data_transform=sklearn.preprocessing.StandardScaler() + # ) + # ce_obs = causal_effects.predict_wright_effect(intervention_data=None) + # ce_int = causal_effects.predict_wright_effect(intervention_data=intervention_data) + + + ## Expected change corresponds to linear regression coefficient + ## for linear models + # print('\n') + # for y in Y: + # beta = (ce_int[y] - ce_obs[y]).mean() + # print("Causal effect of %s on %s = %.2f" %(X, y, beta)) + + + # # Conditional causal effect in observational data + # conditions_data = data.copy() + # conditions_data[:, [cond[0] for cond in conditions]] = 0 + # conditions_data = pp.DataFrame(conditions_data) + + # ce_obs = causal_effects.predict_total_effect( + # intervention_data=None, + # conditions_data=conditions_data, + # ) + + # ce_int = causal_effects.predict_total_effect( + # intervention_data=intervention_data, + # conditions_data=conditions_data, + # ) + + ## Expected change corresponds to linear regression coefficient + ## for linear models + print('\n') + for y in Y: + print("Estimated effect for do(%s = %.2f) given (%s = %.2f) gives %s = %.2f (true = %.3f)" %(X, int_value1, conditions, cond_value, y, ce_int1[y].mean(), true_ce_int1)) + print("Estimated effect for do(%s = %.2f) given (%s = %.2f) gives %s = %.2f (true = %.3f)" %(X, int_value2, conditions, cond_value, y, ce_int2[y].mean(), true_ce_int2)) + + # print(str(estimator)) + if 'Linear' in str(estimator) and (int_value2 - int_value1) == 1.: + beta = (ce_int2[y] - ce_int1[y]).mean() + print("Linear causal effect of %s on %s is %.2f" %(X, y, beta)) + + + + + +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/data_processing.html b/docs/_build/html/_modules/tigramite/data_processing.html new file mode 100644 index 00000000..91fa9788 --- /dev/null +++ b/docs/_build/html/_modules/tigramite/data_processing.html @@ -0,0 +1,807 @@ + + + + + + + tigramite.data_processing — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.data_processing

+"""Tigramite data processing functions."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+from __future__ import print_function
+from collections import defaultdict, OrderedDict
+import sys
+import warnings
+import copy
+import math
+import numpy as np
+import scipy.sparse
+import scipy.sparse.linalg
+from numba import jit
+
+
[docs]class DataFrame(): + """Data object containing time series array and optional mask. + + Alternatively, a panda dataframe can be used. + + Parameters + ---------- + data : array-like + Numpy array of shape (observations T, variables N) + mask : array-like, optional (default: None) + Optional mask array, must be of same shape as data + + Attributes + ---------- + data : array-like + Numpy array of shape (observations T, variables N) + mask : array-like, optional (default: None) + Optional mask array, must be of same shape as data + missing_flag : number, optional (default: None) + Flag for missing values in dataframe. Dismisses all time slices of + samples where missing values occur in any variable and also flags + samples for all lags up to 2*tau_max. This avoids biases, see + section on masking in Supplement of [1]_. + var_names : list of strings, optional (default: range(N)) + Names of variables, must match the number of variables. If None is + passed, variables are enumerated as [0, 1, ...] + datatime : array-like, optional (default: None) + Timelabel array. If None, range(T) is used. + """ + def __init__(self, data, mask=None, missing_flag=None, var_names=None, + datatime=None): + + self.values = data + self.mask = mask + self.missing_flag = missing_flag + if self.missing_flag is not None: + self.values[self.values == self.missing_flag] = np.nan + T, N = data.shape + # Set the variable names + self.var_names = var_names + # Set the default variable names if none are set + if self.var_names is None: + self.var_names = {i: i for i in range(N)} + + # Set datatime + self.datatime = datatime + if self.datatime is None: + self.datatime = np.arange(T) + + # if type(self.values) != np.ndarray: + # raise TypeError("data is of type %s, " % type(self.values) + + # "must be np.ndarray") + if N > T: + warnings.warn("data.shape = %s," % str(self.values.shape) + + " is it of shape (observations, variables) ?") + # if np.isnan(data).sum() != 0: + # raise ValueError("NaNs in the data") + self._check_mask() + + self.T = T + self.N = N + + # If PCMCI.run_bootstrap_of is called, then the + # bootstrap random draw can be set here + self.bootstrap = None + + def _check_mask(self, mask=None, require_mask=False): + """Checks that the mask is: + * The same shape as the data + * Is an numpy ndarray (or subtype) + * Does not contain any NaN entrie + + Parameters + ---------- + require_mask : bool (default : False) + """ + # Check that there is a mask if required + _use_mask = mask + if _use_mask is None: + _use_mask = self.mask + if require_mask and _use_mask is None: + raise ValueError("Expected a mask, but got nothing!") + # If we have a mask, check it + if _use_mask is not None: + # Check the mask inherets from an ndarray + if not isinstance(_use_mask, np.ndarray): + raise TypeError("mask is of type %s, " % + type(_use_mask) + + "must be numpy.ndarray") + # Check if there is an nan-value in the mask + if np.isnan(np.sum(_use_mask)): + raise ValueError("NaNs in the data mask") + # Check the mask and the values have the same shape + if self.values.shape != _use_mask.shape: + raise ValueError("shape mismatch: dataframe.values.shape = %s" + % str(self.values.shape) + \ + " but mask.shape = %s, must be identical" + % str(_use_mask.shape)) + +
[docs] def construct_array(self, X, Y, Z, tau_max, + mask=None, + mask_type=None, + return_cleaned_xyz=False, + do_checks=True, + cut_off='2xtau_max', + verbosity=0): + """Constructs array from variables X, Y, Z from data. + + Data is of shape (T, N), where T is the time series length and N the + number of variables. + + Parameters + ---------- + X, Y, Z : list of tuples + For a dependence measure I(X;Y|Z), X, Y, Z can be multivariate of + the form [(var1, -lag), (var2, -lag), ...]. At least one varlag in Y + has to be at lag zero. + tau_max : int + Maximum time lag. This may be used to make sure that estimates for + different lags in X and Z all have the same sample size. + mask : array-like, optional (default: None) + Optional mask array, must be of same shape as data. If it is set, + then it overrides the self.mask assigned to the dataframe. If it is + None, then the self.mask is used, if it exists. + mask_type : {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. Explained in tutorial on masking and missing values. + return_cleaned_xyz : bool, optional (default: False) + Whether to return cleaned X,Y,Z, where possible duplicates are + removed. + do_checks : bool, optional (default: True) + Whether to perform sanity checks on input X,Y,Z + cut_off : {'2xtau_max', 'max_lag', 'max_lag_or_tau_max'} + How many samples to cutoff at the beginning. The default is + '2xtau_max', which guarantees that MCI tests are all conducted on + the same samples. For modeling, 'max_lag_or_tau_max' can be used, + which uses the maximum of tau_max and the conditions, which is + useful to compare multiple models on the same sample. Last, + 'max_lag' uses as much samples as possible. + verbosity : int, optional (default: 0) + Level of verbosity. + + Returns + ------- + array, xyz [,XYZ] : Tuple of data array of shape (dim, time) and xyz + identifier array of shape (dim,) identifying which row in array + corresponds to X, Y, and Z. For example:: X = [(0, -1)], Y = [(1, + 0)], Z = [(1, -1), (0, -2)] yields an array of shape (4, T) and + xyz is xyz = numpy.array([0,1,2,2]) If return_cleaned_xyz is + True, also outputs the cleaned XYZ lists. + + """ + + # Get the length in time and the number of nodes + T, N = self.values.shape + + # Remove duplicates in X, Y, Z + X = list(OrderedDict.fromkeys(X)) + Y = list(OrderedDict.fromkeys(Y)) + Z = list(OrderedDict.fromkeys(Z)) + + # If a node in Z occurs already in X or Y, remove it from Z + Z = [node for node in Z if (node not in X) and (node not in Y)] + + # Check that all lags are non-positive and indices are in [0,N-1] + XYZ = X + Y + Z + dim = len(XYZ) + + # Ensure that XYZ makes sense + if do_checks: + self._check_nodes(Y, XYZ, N, dim) + + # Figure out what cut off we will be using + if cut_off == '2xtau_max': + max_lag = 2*tau_max + elif cut_off == 'max_lag': + max_lag = abs(np.array(XYZ)[:, 1].min()) + elif cut_off == 'max_lag_or_tau_max': + max_lag = max(abs(np.array(XYZ)[:, 1].min()), tau_max) + else: + raise ValueError("max_lag must be in {'2xtau_max', 'max_lag', 'max_lag_or_tau_max'}") + + # Setup XYZ identifier + index_code = {'x' : 0, + 'y' : 1, + 'z' : 2} + xyz = np.array([index_code[name] + for var, name in zip([X, Y, Z], ['x', 'y', 'z']) + for _ in var]) + + # Setup and fill array with lagged time series + time_length = T - max_lag + array = np.zeros((dim, time_length), dtype=self.values.dtype) + # Note, lags are negative here + for i, (var, lag) in enumerate(XYZ): + if self.bootstrap is None: + array[i, :] = self.values[max_lag + lag:T + lag, var] + else: + array[i, :] = self.values[self.bootstrap + lag, var] + + # Choose which indices to use + use_indices = np.ones(time_length, dtype='int') + + # Remove all values that have missing value flag, as well as the time + # slices that occur up to max_lag after + if self.missing_flag is not None: + missing_anywhere = np.any(np.isnan(self.values), axis=1) + for tau in range(max_lag+1): + if self.bootstrap is None: + use_indices[missing_anywhere[tau:T-max_lag+tau]] = 0 + else: + use_indices[missing_anywhere[self.bootstrap - max_lag + tau]] = 0 + + # Use the mask override if needed + _use_mask = mask + if _use_mask is None: + _use_mask = self.mask + else: + self._check_mask(mask=_use_mask) + + if _use_mask is not None: + # Remove samples with mask == 1 conditional on which mask_type is + # used Create an array selector that is the same shape as the output + # array + array_mask = np.zeros((dim, time_length), dtype='int32') + # Iterate over all nodes named in X, Y, or Z + for i, (var, lag) in enumerate(XYZ): + # Transform the mask into the output array shape, i.e. from data + # mask to array mask + if self.bootstrap is None: + array_mask[i, :] = (_use_mask[max_lag + lag: T + lag, var] == False) + else: + array_mask[i, :] = (_use_mask[self.bootstrap + lag, var] == False) + + # Iterate over defined mapping from letter index to number index, + # i.e. 'x' -> 0, 'y' -> 1, 'z'-> 2 + for idx, cde in index_code.items(): + # Check if the letter index is in the mask type + if (mask_type is not None) and (idx in mask_type): + # If so, check if any of the data that correspond to the + # letter index is masked by taking the product along the + # node-data to return a time slice selection, where 0 means + # the time slice will not be used + slice_select = np.prod(array_mask[xyz == cde, :], axis=0) + use_indices *= slice_select + + if (self.missing_flag is not None) or (_use_mask is not None): + if use_indices.sum() == 0: + raise ValueError("No unmasked samples") + array = array[:, use_indices == 1] + + # Print information about the constructed array + if verbosity > 2: + self.print_array_info(array, X, Y, Z, self.missing_flag, mask_type) + + # Return the array and xyz and optionally (X, Y, Z) + if return_cleaned_xyz: + return array, xyz, (X, Y, Z) + + return array, xyz
+ + def _check_nodes(self, Y, XYZ, N, dim): + """ + Checks that: + * The requests XYZ nodes have the correct shape + * All lags are non-positive + * All indices are less than N + * One of the Y nodes has zero lag + + Parameters + ---------- + Y : list of tuples + Of the form [(var, -tau)], where var specifies the variable + index and tau the time lag. + XYZ : list of tuples + List of nodes chosen for current independence test + N : int + Total number of listed nodes + dim : int + Number of nodes excluding repeated nodes + """ + if np.array(XYZ).shape != (dim, 2): + raise ValueError("X, Y, Z must be lists of tuples in format" + " [(var, -lag),...], eg., [(2, -2), (1, 0), ...]") + if np.any(np.array(XYZ)[:, 1] > 0): + raise ValueError("nodes are %s, " % str(XYZ) + + "but all lags must be non-positive") + if (np.any(np.array(XYZ)[:, 0] >= N) + or np.any(np.array(XYZ)[:, 0] < 0)): + raise ValueError("var indices %s," % str(np.array(XYZ)[:, 0]) + + " but must be in [0, %d]" % (N - 1)) + # if np.all(np.array(Y)[:, 1] != 0): + # raise ValueError("Y-nodes are %s, " % str(Y) + + # "but one of the Y-nodes must have zero lag") + +
[docs] def print_array_info(self, array, X, Y, Z, missing_flag, mask_type): + """ + Print info about the constructed array + + Parameters + ---------- + array : Data array of shape (dim, T) + Data array. + X, Y, Z : list of tuples + For a dependence measure I(X;Y|Z), Y is of the form [(varY, 0)], + where var specifies the variable index. X typically is of the form + [(varX, -tau)] with tau denoting the time lag and Z can be + multivariate [(var1, -lag), (var2, -lag), ...] . + missing_flag : number, optional (default: None) + Flag for missing values. Dismisses all time slices of samples where + missing values occur in any variable and also flags samples for all + lags up to 2*tau_max. This avoids biases, see section on masking in + Supplement of [1]_. + mask_type : {'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. Explained in tutorial on masking and missing values. + """ + indt = " " * 12 + print(indt + "Constructed array of shape %s from"%str(array.shape) + + "\n" + indt + "X = %s" % str(X) + + "\n" + indt + "Y = %s" % str(Y) + + "\n" + indt + "Z = %s" % str(Z)) + if self.mask is not None and mask_type is not None: + print(indt+"with masked samples in %s removed" % mask_type) + if self.missing_flag is not None: + print(indt+"with missing values = %s removed" % self.missing_flag)
+ + + +
[docs]def lowhighpass_filter(data, cutperiod, pass_periods='low'): + """Butterworth low- or high pass filter. + + This function applies a linear filter twice, once forward and once + backwards. The combined filter has linear phase. + + Parameters + ---------- + data : array + Data array of shape (time, variables). + cutperiod : int + Period of cutoff. + pass_periods : str, optional (default: 'low') + Either 'low' or 'high' to act as a low- or high-pass filter + + Returns + ------- + data : array + Filtered data array. + """ + try: + from scipy.signal import butter, filtfilt + except: + print('Could not import scipy.signal for butterworth filtering!') + + fs = 1. + order = 3 + ws = 1. / cutperiod / (0.5 * fs) + b, a = butter(order, ws, pass_periods) + if np.ndim(data) == 1: + data = filtfilt(b, a, data) + else: + for i in range(data.shape[1]): + data[:, i] = filtfilt(b, a, data[:, i]) + + return data
+ + +
[docs]def smooth(data, smooth_width, kernel='gaussian', + mask=None, residuals=False): + """Returns either smoothed time series or its residuals. + + the difference between the original and the smoothed time series + (=residuals) of a kernel smoothing with gaussian (smoothing kernel width = + twice the sigma!) or heaviside window, equivalent to a running mean. + + Assumes data of shape (T, N) or (T,) + :rtype: array + :returns: smoothed/residual data + + Parameters + ---------- + data : array + Data array of shape (time, variables). + smooth_width : float + Window width of smoothing, 2*sigma for a gaussian. + kernel : str, optional (default: 'gaussian') + Smoothing kernel, 'gaussian' or 'heaviside' for a running mean. + mask : bool array, optional (default: None) + Data mask where True labels masked samples. + residuals : bool, optional (default: False) + True if residuals should be returned instead of smoothed data. + + Returns + ------- + data : array-like + Smoothed/residual data. + """ + + print("%s %s smoothing with " % ({True: "Take residuals of a ", + False: ""}[residuals], kernel) + + "window width %.2f (2*sigma for a gaussian!)" % (smooth_width)) + + totaltime = len(data) + if kernel == 'gaussian': + window = np.exp(-(np.arange(totaltime).reshape((1, totaltime)) - + np.arange(totaltime).reshape((totaltime, 1)) + ) ** 2 / ((2. * smooth_width / 2.) ** 2)) + elif kernel == 'heaviside': + import scipy.linalg + wtmp = np.zeros(totaltime) + wtmp[:np.ceil(smooth_width / 2.)] = 1 + window = scipy.linalg.toeplitz(wtmp) + + if mask is None: + if np.ndim(data) == 1: + smoothed_data = (data * window).sum(axis=1) / window.sum(axis=1) + else: + smoothed_data = np.zeros(data.shape) + for i in range(data.shape[1]): + smoothed_data[:, i] = ( + data[:, i] * window).sum(axis=1) / window.sum(axis=1) + else: + if np.ndim(data) == 1: + smoothed_data = ((data * window * (mask==False)).sum(axis=1) / + (window * (mask==False)).sum(axis=1)) + else: + smoothed_data = np.zeros(data.shape) + for i in range(data.shape[1]): + smoothed_data[:, i] = (( + data[:, i] * window * (mask==False)[:, i]).sum(axis=1) / + (window * (mask==False)[:, i]).sum(axis=1)) + + if residuals: + return data - smoothed_data + else: + return smoothed_data
+ + +
[docs]def weighted_avg_and_std(values, axis, weights): + """Returns the weighted average and standard deviation. + + Parameters + --------- + values : array + Data array of shape (time, variables). + axis : int + Axis to average/std about + weights : array + Weight array of shape (time, variables). + + Returns + ------- + (average, std) : tuple of arrays + Tuple of weighted average and standard deviation along axis. + """ + + values[np.isnan(values)] = 0. + average = np.ma.average(values, axis=axis, weights=weights) + + variance = np.sum(weights * (values - np.expand_dims(average, axis) + ) ** 2, axis=axis) / weights.sum(axis=axis) + + return (average, np.sqrt(variance))
+ + +
[docs]def time_bin_with_mask(data, time_bin_length, mask=None): + """Returns time binned data where only about non-masked values is averaged. + + Parameters + ---------- + data : array + Data array of shape (time, variables). + time_bin_length : int + Length of time bin. + mask : bool array, optional (default: None) + Data mask where True labels masked samples. + + Returns + ------- + (bindata, T) : tuple of array and int + Tuple of time-binned data array and new length of array. + """ + + T = len(data) + + time_bin_length = int(time_bin_length) + + if mask is None: + sample_selector = np.ones(data.shape) + else: + # Invert mask + sample_selector = (mask == False) + + if np.ndim(data) == 1.: + data.shape = (T, 1) + mask.shape = (T, 1) + + bindata = np.zeros( + (T // time_bin_length,) + data.shape[1:], dtype="float32") + for index, i in enumerate(range(0, T - time_bin_length + 1, + time_bin_length)): + # print weighted_avg_and_std(fulldata[i:i+time_bin_length], axis=0, + # weights=sample_selector[i:i+time_bin_length])[0] + bindata[index] = weighted_avg_and_std(data[i:i + time_bin_length], + axis=0, + weights=sample_selector[i:i + + time_bin_length])[0] + + T, grid_size = bindata.shape + + return (bindata.squeeze(), T)
+ +@jit +def _get_patterns(array, array_mask, patt, patt_mask, weights, dim, step, fac, N, T): + v = np.zeros(dim, dtype='float') + + start = step * (dim - 1) + for n in range(0, N): + for t in range(start, T): + mask = 1 + ave = 0. + for k in range(0, dim): + tau = k * step + v[k] = array[t - tau, n] + ave += v[k] + mask *= array_mask[t - tau, n] + ave /= dim + var = 0. + for k in range(0, dim): + var += (v[k] - ave) ** 2 + var /= dim + weights[t - start, n] = var + if (v[0] < v[1]): + p = 1 + else: + p = 0 + for i in range(2, dim): + for j in range(0, i): + if (v[j] < v[i]): + p += fac[i] + patt[t - start, n] = p + patt_mask[t - start, n] = mask + + return patt, patt_mask, weights + +
[docs]def ordinal_patt_array(array, array_mask=None, dim=2, step=1, + weights=False, verbosity=0): + """Returns symbolified array of ordinal patterns. + + Each data vector (X_t, ..., X_t+(dim-1)*step) is converted to its rank + vector. E.g., (0.2, -.6, 1.2) --> (1,0,2) which is then assigned to a + unique integer (see Article). There are faculty(dim) possible rank vectors. + + Note that the symb_array is step*(dim-1) shorter than the original array! + + Reference: B. Pompe and J. Runge (2011). Momentary information transfer as + a coupling measure of time series. Phys. Rev. E, 83(5), 1-12. + doi:10.1103/PhysRevE.83.051122 + + Parameters + ---------- + array : array-like + Data array of shape (time, variables). + array_mask : bool array + Data mask where True labels masked samples. + dim : int, optional (default: 2) + Pattern dimension + step : int, optional (default: 1) + Delay of pattern embedding vector. + weights : bool, optional (default: False) + Whether to return array of variances of embedding vectors as weights. + verbosity : int, optional (default: 0) + Level of verbosity. + + Returns + ------- + patt, patt_mask [, patt_time] : tuple of arrays + Tuple of converted pattern array and new length + """ + from scipy.misc import factorial + + array = array.astype('float64') + + if array_mask is not None: + assert array_mask.dtype == 'int32' + else: + array_mask = np.zeros(array.shape, dtype='int32') + + + if np.ndim(array) == 1: + T = len(array) + array = array.reshape(T, 1) + array_mask = array_mask.reshape(T, 1) + + # Add noise to destroy ties... + array += (1E-6 * array.std(axis=0) + * np.random.rand(array.shape[0], array.shape[1]).astype('float64')) + + + patt_time = int(array.shape[0] - step * (dim - 1)) + T, N = array.shape + + if dim <= 1 or patt_time <= 0: + raise ValueError("Dim mist be > 1 and length of delay vector smaller " + "array length.") + + patt = np.zeros((patt_time, N), dtype='int32') + weights_array = np.zeros((patt_time, N), dtype='float64') + + patt_mask = np.zeros((patt_time, N), dtype='int32') + + # Precompute factorial for c-code... patterns of dimension + # larger than 10 are not supported + fac = factorial(np.arange(10)).astype('int32') + + # _get_patterns assumes mask=0 to be a masked value + array_mask = (array_mask == False).astype('int32') + + (patt, patt_mask, weights_array) = _get_patterns(array, array_mask, patt, patt_mask, weights_array, dim, step, fac, N, T) + + weights_array = np.asarray(weights_array) + patt = np.asarray(patt) + # Transform back to mask=1 implying a masked value + patt_mask = np.asarray(patt_mask) == False + + if weights: + return patt, patt_mask, patt_time, weights_array + else: + return patt, patt_mask, patt_time
+ + +
[docs]def quantile_bin_array(data, bins=6): + """Returns symbolified array with equal-quantile binning. + + Parameters + ---------- + data : array + Data array of shape (time, variables). + bins : int, optional (default: 6) + Number of bins. + + Returns + ------- + symb_array : array + Converted data of integer type. + """ + T, N = data.shape + + # get the bin quantile steps + bin_edge = int(np.ceil(T / float(bins))) + + symb_array = np.zeros((T, N), dtype='int32') + + # get the lower edges of the bins for every time series + edges = np.sort(data, axis=0)[::bin_edge, :].T + bins = edges.shape[1] + + # This gives the symbolic time series + symb_array = (data.reshape(T, N, 1) >= edges.reshape(1, N, bins)).sum( + axis=2) - 1 + + return symb_array.astype('int32')
+ +
[docs]def var_process(parents_neighbors_coeffs, T=1000, use='inv_inno_cov', + verbosity=0, initial_values=None): + """Returns a vector-autoregressive process with correlated innovations. + + Wrapper around var_network with possibly more user-friendly input options. + + DEPRECATED. Will be removed in future. + """ + print("data generating models are now in toymodels folder: " + "from tigramite.toymodels import structural_causal_processes as toys.") + return None
+ +
[docs]def structural_causal_process(links, T, noises=None, + intervention=None, intervention_type='hard', + seed=None): + """Returns a structural causal process with contemporaneous and lagged + dependencies. + + DEPRECATED. Will be removed in future. + """ + print("data generating models are now in toymodels folder: " + "from tigramite.toymodels import structural_causal_processes as toys.") + return None
+ + +if __name__ == '__main__': + + ## Generate some time series from a structural causal process + def lin_f(x): return x + def nonlin_f(x): return (x + 5. * x**2 * np.exp(-x**2 / 20.)) + + links = {0: [((0, -1), 0.9, lin_f)], + 1: [((1, -1), 0.8, lin_f), ((0, -1), 0.3, nonlin_f)], + 2: [((2, -1), 0.7, lin_f), ((1, 0), -0.2, lin_f)], + } + noises = [np.random.randn, np.random.randn, np.random.randn] + data, nonstat = structural_causal_process(links, + T=100, noises=noises) + print(data.shape) +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/independence_tests/cmiknn.html b/docs/_build/html/_modules/tigramite/independence_tests/cmiknn.html new file mode 100644 index 00000000..2c5aceab --- /dev/null +++ b/docs/_build/html/_modules/tigramite/independence_tests/cmiknn.html @@ -0,0 +1,529 @@ + + + + + + + tigramite.independence_tests.cmiknn — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.independence_tests.cmiknn

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+from scipy import special, spatial
+import numpy as np
+from .independence_tests_base import CondIndTest
+from numba import jit
+
+
+
[docs]class CMIknn(CondIndTest): + r"""Conditional mutual information test based on nearest-neighbor estimator. + + Conditional mutual information is the most general dependency measure coming + from an information-theoretic framework. It makes no assumptions about the + parametric form of the dependencies by directly estimating the underlying + joint density. The test here is based on the estimator in S. Frenzel and B. + Pompe, Phys. Rev. Lett. 99, 204101 (2007), combined with a shuffle test to + generate the distribution under the null hypothesis of independence first + used in [3]_. The knn-estimator is suitable only for variables taking a + continuous range of values. For discrete variables use the CMIsymb class. + + Notes + ----- + CMI is given by + + .. math:: I(X;Y|Z) &= \int p(z) \iint p(x,y|z) \log + \frac{ p(x,y |z)}{p(x|z)\cdot p(y |z)} \,dx dy dz + + Its knn-estimator is given by + + .. math:: \widehat{I}(X;Y|Z) &= \psi (k) + \frac{1}{T} \sum_{t=1}^T + \left[ \psi(k_{Z,t}) - \psi(k_{XZ,t}) - \psi(k_{YZ,t}) \right] + + where :math:`\psi` is the Digamma function. This estimator has as a + parameter the number of nearest-neighbors :math:`k` which determines the + size of hyper-cubes around each (high-dimensional) sample point. Then + :math:`k_{Z,},k_{XZ},k_{YZ}` are the numbers of neighbors in the respective + subspaces. + + :math:`k` can be viewed as a density smoothing parameter (although it is + data-adaptive unlike fixed-bandwidth estimators). For large :math:`k`, the + underlying dependencies are more smoothed and CMI has a larger bias, + but lower variance, which is more important for significance testing. Note + that the estimated CMI values can be slightly negative while CMI is a non- + negative quantity. + + This method requires the scipy.spatial.cKDTree package. + + References + ---------- + + .. [3] J. Runge (2018): Conditional Independence Testing Based on a + Nearest-Neighbor Estimator of Conditional Mutual Information. + In Proceedings of the 21st International Conference on Artificial + Intelligence and Statistics. + http://proceedings.mlr.press/v84/runge18a.html + + Parameters + ---------- + knn : int or float, optional (default: 0.2) + Number of nearest-neighbors which determines the size of hyper-cubes + around each (high-dimensional) sample point. If smaller than 1, this is + computed as a fraction of T, hence knn=knn*T. For knn larger or equal to + 1, this is the absolute number. + + shuffle_neighbors : int, optional (default: 10) + Number of nearest-neighbors within Z for the shuffle surrogates which + determines the size of hyper-cubes around each (high-dimensional) sample + point. + + transform : {'ranks', 'standardize', 'uniform', False}, optional + (default: 'ranks') + Whether to transform the array beforehand by standardizing + or transforming to uniform marginals. + + workers : int (optional, default = -1) + Number of workers to use for parallel processing. If -1 is given + all processors are used. Default: 1. + + significance : str, optional (default: 'shuffle_test') + Type of significance test to use. For CMIknn only 'fixed_thres' and + 'shuffle_test' are available. + + **kwargs : + Arguments passed on to parent class CondIndTest. + """ + @property + def measure(self): + """ + Concrete property to return the measure of the independence test + """ + return self._measure + + def __init__(self, + knn=0.2, + shuffle_neighbors=5, + significance='shuffle_test', + transform='ranks', + workers=-1, + **kwargs): + # Set the member variables + self.knn = knn + self.shuffle_neighbors = shuffle_neighbors + self.transform = transform + self._measure = 'cmi_knn' + self.two_sided = False + self.residual_based = False + self.recycle_residuals = False + self.workers = workers + # Call the parent constructor + CondIndTest.__init__(self, significance=significance, **kwargs) + # Print some information about construction + if self.verbosity > 0: + if self.knn < 1: + print("knn/T = %s" % self.knn) + else: + print("knn = %s" % self.knn) + print("shuffle_neighbors = %d\n" % self.shuffle_neighbors) + + @jit(forceobj=True) + def _get_nearest_neighbors(self, array, xyz, knn): + """Returns nearest neighbors according to Frenzel and Pompe (2007). + + Retrieves the distances eps to the k-th nearest neighbors for every + sample in joint space XYZ and returns the numbers of nearest neighbors + within eps in subspaces Z, XZ, YZ. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + knn : int or float + Number of nearest-neighbors which determines the size of hyper-cubes + around each (high-dimensional) sample point. If smaller than 1, this + is computed as a fraction of T, hence knn=knn*T. For knn larger or + equal to 1, this is the absolute number. + + Returns + ------- + k_xz, k_yz, k_z : tuple of arrays of shape (T,) + Nearest neighbors in subspaces. + """ + + array = array.astype(np.float64) + xyz = xyz.astype(np.int32) + + dim, T = array.shape + + # Add noise to destroy ties... + array += (1E-6 * array.std(axis=1).reshape(dim, 1) + * self.random_state.random((array.shape[0], array.shape[1]))) + + if self.transform == 'standardize': + # Standardize + array = array.astype(np.float64) + array -= array.mean(axis=1).reshape(dim, 1) + array /= array.std(axis=1).reshape(dim, 1) + # FIXME: If the time series is constant, return nan rather than + # raising Exception + if np.isnan(array).sum() != 0: + raise ValueError("nans after standardizing, " + "possibly constant array!") + elif self.transform == 'uniform': + array = self._trafo2uniform(array) + elif self.transform == 'ranks': + array = array.argsort(axis=1).argsort(axis=1).astype(np.float64) + + array = array.T + tree_xyz = spatial.cKDTree(array) + epsarray = tree_xyz.query(array, k=[knn+1], p=np.inf, + eps=0., workers=self.workers)[0][:, 0].astype(np.float64) + + # To search neighbors < eps + epsarray = np.multiply(epsarray, 0.99999) + + # Subsample indices + x_indices = np.where(xyz == 0)[0] + y_indices = np.where(xyz == 1)[0] + z_indices = np.where(xyz == 2)[0] + + # Find nearest neighbors in subspaces + xz = array[:, np.concatenate((x_indices, z_indices))] + tree_xz = spatial.cKDTree(xz) + k_xz = tree_xz.query_ball_point(xz, r=epsarray, eps=0., p=np.inf, workers=self.workers, return_length=True) + + yz = array[:, np.concatenate((y_indices, z_indices))] + tree_yz = spatial.cKDTree(yz) + k_yz = tree_yz.query_ball_point(yz, r=epsarray, eps=0., p=np.inf, workers=self.workers, return_length=True) + + if len(z_indices) > 0: + z = array[:, z_indices] + tree_z = spatial.cKDTree(z) + k_z = tree_z.query_ball_point(z, r=epsarray, eps=0., p=np.inf, workers=self.workers, return_length=True) + else: + # Number of neighbors is T when z is empty. + k_z = np.full(T, T, dtype=np.float64) + + return k_xz, k_yz, k_z + +
[docs] def get_dependence_measure(self, array, xyz): + """Returns CMI estimate as described in Frenzel and Pompe PRL (2007). + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + Returns + ------- + val : float + Conditional mutual information estimate. + """ + + dim, T = array.shape + + if self.knn < 1: + knn_here = max(1, int(self.knn*T)) + else: + knn_here = max(1, int(self.knn)) + + + k_xz, k_yz, k_z = self._get_nearest_neighbors(array=array, + xyz=xyz, + knn=knn_here) + + val = special.digamma(knn_here) - (special.digamma(k_xz) + + special.digamma(k_yz) - + special.digamma(k_z)).mean() + + return val
+ + +
[docs] def get_shuffle_significance(self, array, xyz, value, + return_null_dist=False): + """Returns p-value for nearest-neighbor shuffle significance test. + + For non-empty Z, overwrites get_shuffle_significance from the parent + class which is a block shuffle test, which does not preserve + dependencies of X and Y with Z. Here the parameter shuffle_neighbors is + used to permute only those values :math:`x_i` and :math:`x_j` for which + :math:`z_j` is among the nearest niehgbors of :math:`z_i`. If Z is + empty, the block-shuffle test is used. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + value : number + Value of test statistic for unshuffled estimate. + + Returns + ------- + pval : float + p-value + """ + dim, T = array.shape + + # Skip shuffle test if value is above threshold + # if value > self.minimum threshold: + # if return_null_dist: + # return 0., None + # else: + # return 0. + + # max_neighbors = max(1, int(max_neighbor_ratio*T)) + x_indices = np.where(xyz == 0)[0] + z_indices = np.where(xyz == 2)[0] + + if len(z_indices) > 0 and self.shuffle_neighbors < T: + if self.verbosity > 2: + print(" nearest-neighbor shuffle significance " + "test with n = %d and %d surrogates" % ( + self.shuffle_neighbors, self.sig_samples)) + + # Get nearest neighbors around each sample point in Z + z_array = np.fastCopyAndTranspose(array[z_indices, :]) + tree_xyz = spatial.cKDTree(z_array) + neighbors = tree_xyz.query(z_array, + k=self.shuffle_neighbors, + p=np.inf, + eps=0.)[1].astype(np.int32) + + null_dist = np.zeros(self.sig_samples) + for sam in range(self.sig_samples): + + # Generate random order in which to go through indices loop in + # next step + order = self.random_state.permutation(T).astype(np.int32) + + # Shuffle neighbor indices for each sample index + for i in range(len(neighbors)): + self.random_state.shuffle(neighbors[i]) + # neighbors = self.random_state.permuted(neighbors, axis=1) + + # Select a series of neighbor indices that contains as few as + # possible duplicates + restricted_permutation = self.get_restricted_permutation( + T=T, + shuffle_neighbors=self.shuffle_neighbors, + neighbors=neighbors, + order=order) + + array_shuffled = np.copy(array) + for i in x_indices: + array_shuffled[i] = array[i, restricted_permutation] + + null_dist[sam] = self.get_dependence_measure(array_shuffled, + xyz) + + else: + null_dist = \ + self._get_shuffle_dist(array, xyz, + self.get_dependence_measure, + sig_samples=self.sig_samples, + sig_blocklength=self.sig_blocklength, + verbosity=self.verbosity) + + pval = (null_dist >= value).mean() + + if return_null_dist: + # Sort + null_dist.sort() + return pval, null_dist + return pval
+ + +
[docs] def get_conditional_entropy(self, array, xyz): + """Returns the nearest-neighbor conditional entropy estimate of H(X|Y). + + Parameters + ---------- + array : array-like + data array with X, Y in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). Here only uses 0 for X and + 1 for Y. + + Returns + ------- + val : float + Entropy estimate. + """ + + + dim, T = array.shape + + if self.knn < 1: + knn_here = max(1, int(self.knn*T)) + else: + knn_here = max(1, int(self.knn)) + + + array = array.astype(np.float64) + + # Add noise to destroy ties... + array += (1E-6 * array.std(axis=1).reshape(dim, 1) + * np.random.rand(array.shape[0], array.shape[1])) + + if self.transform == 'standardize': + # Standardize + array = array.astype(np.float64) + array -= array.mean(axis=1).reshape(dim, 1) + array /= array.std(axis=1).reshape(dim, 1) + # FIXME: If the time series is constant, return nan rather than + # raising Exception + if np.isnan(array).sum() != 0: + raise ValueError("nans after standardizing, " + "possibly constant array!") + elif self.transform == 'uniform': + array = self._trafo2uniform(array) + elif self.transform == 'ranks': + array = array.argsort(axis=1).argsort(axis=1).astype(np.float64) + + # Compute conditional entropy as H(X|Y) = H(X) - I(X;Y) + + # First compute H(X) + # Use cKDTree to get distances eps to the k-th nearest neighbors for + # every sample in joint space X with maximum norm + x_indices = np.where(xyz == 0)[0] + y_indices = np.where(xyz == 1)[0] + + dim_x = int(np.where(xyz == 0)[0][-1] + 1) + if 1 in xyz: + dim_y = int(np.where(xyz == 1)[0][-1] + 1 - dim_x) + else: + dim_y = 0 + + + x_array = np.fastCopyAndTranspose(array[x_indices, :]) + tree_xyz = spatial.cKDTree(x_array) + epsarray = tree_xyz.query(x_array, k=[knn_here+1], p=np.inf, + eps=0., workers=self.workers)[0][:, 0].astype(np.float64) + + h_x = - special.digamma(knn_here) + special.digamma(T) + dim_x * np.log(2.*epsarray).mean() + + # Then compute MI(X;Y) + if dim_y > 0: + xyz_here = np.array([index for index in xyz if index == 0 or index == 1]) + array_xy = array[list(x_indices) + list(y_indices), :] + i_xy = self.get_dependence_measure(array_xy, xyz_here) + else: + i_xy = 0. + + h_x_y = h_x - i_xy + + return h_x_y
+ + + @jit(forceobj=True) + def get_restricted_permutation(self, T, shuffle_neighbors, neighbors, order): + + restricted_permutation = np.zeros(T, dtype=np.int32) + used = np.array([], dtype=np.int32) + + for sample_index in order: + m = 0 + use = neighbors[sample_index, m] + + while ((use in used) and (m < shuffle_neighbors - 1)): + m += 1 + use = neighbors[sample_index, m] + + restricted_permutation[sample_index] = use + used = np.append(used, use) + + return restricted_permutation
+
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/independence_tests/cmisymb.html b/docs/_build/html/_modules/tigramite/independence_tests/cmisymb.html new file mode 100644 index 00000000..924062ac --- /dev/null +++ b/docs/_build/html/_modules/tigramite/independence_tests/cmisymb.html @@ -0,0 +1,305 @@ + + + + + + + tigramite.independence_tests.cmisymb — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.independence_tests.cmisymb

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+import warnings
+import numpy as np
+
+from .independence_tests_base import CondIndTest
+
+
[docs]class CMIsymb(CondIndTest): + r"""Conditional mutual information test based on discrete estimator. + + Conditional mutual information is the most general dependency measure + coming from an information-theoretic framework. It makes no assumptions + about the parametric form of the dependencies by directly estimating the + underlying joint density. The test here is based on directly estimating + the joint distribution assuming symbolic input, combined with a + shuffle test to generate the distribution under the null hypothesis of + independence. The knn-estimator is suitable only for discrete variables. + For continuous variables, either pre-process the data using the functions + in data_processing or, better, use the CMIknn class. + + Notes + ----- + CMI and its estimator are given by + + .. math:: I(X;Y|Z) &= \sum p(z) \sum \sum p(x,y|z) \log + \frac{ p(x,y |z)}{p(x|z)\cdot p(y |z)} \,dx dy dz + + Parameters + ---------- + n_symbs : int, optional (default: None) + Number of symbols in input data. Should be at least as large as the + maximum array entry + 1. If None, n_symbs is based on the + maximum value in the array (array.max() + 1). + + significance : str, optional (default: 'shuffle_test') + Type of significance test to use. For CMIsymb only 'fixed_thres' and + 'shuffle_test' are available. + + sig_blocklength : int, optional (default: 1) + Block length for block-shuffle significance test. + + conf_blocklength : int, optional (default: 1) + Block length for block-bootstrap. + + **kwargs : + Arguments passed on to parent class CondIndTest. + """ + @property + def measure(self): + """ + Concrete property to return the measure of the independence test + """ + return self._measure + + def __init__(self, + n_symbs=None, + significance='shuffle_test', + sig_blocklength=1, + conf_blocklength=1, + **kwargs): + # Setup the member variables + self._measure = 'cmi_symb' + self.two_sided = False + self.residual_based = False + self.recycle_residuals = False + self.n_symbs = n_symbs + # Call the parent constructor + CondIndTest.__init__(self, + significance=significance, + sig_blocklength=sig_blocklength, + conf_blocklength=conf_blocklength, + **kwargs) + + if self.verbosity > 0: + print("n_symbs = %s" % self.n_symbs) + print("") + + if self.conf_blocklength is None or self.sig_blocklength is None: + warnings.warn("Automatic block-length estimations from decay of " + "autocorrelation may not be sensical for discrete " + "data") + + def _bincount_hist(self, symb_array, weights=None): + """Computes histogram from symbolic array. + + The maximum of the symbolic array determines the alphabet / number + of bins. + + Parameters + ---------- + symb_array : integer array + Data array of shape (dim, T). + + weights : float array, optional (default: None) + Optional weights array of shape (dim, T). + + Returns + ------- + hist : array + Histogram array of shape (base, base, base, ...)*number of + dimensions with Z-dimensions coming first. + """ + + if self.n_symbs is None: + n_symbs = int(symb_array.max() + 1) + else: + n_symbs = self.n_symbs + if n_symbs < int(symb_array.max() + 1): + raise ValueError("n_symbs must be >= symb_array.max() + 1 = {}".format(symb_array.max() + 1)) + + if 'int' not in str(symb_array.dtype): + raise ValueError("Input data must of integer type, where each " + "number indexes a symbol.") + + dim, T = symb_array.shape + + flathist = np.zeros((n_symbs ** dim), dtype='int16') + multisymb = np.zeros(T, dtype='int64') + if weights is not None: + flathist = np.zeros((n_symbs ** dim), dtype='float32') + multiweights = np.ones(T, dtype='float32') + + for i in range(dim): + multisymb += symb_array[i, :] * n_symbs ** i + if weights is not None: + multiweights *= weights[i, :] + + if weights is None: + result = np.bincount(multisymb) + else: + result = (np.bincount(multisymb, weights=multiweights) + / multiweights.sum()) + + flathist[:len(result)] += result + + hist = flathist.reshape(tuple([n_symbs, n_symbs] + + [n_symbs for i in range(dim - 2)])).T + + return hist + +
[docs] def get_dependence_measure(self, array, xyz): + """Returns CMI estimate based on bincount histogram. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + Returns + ------- + val : float + Conditional mutual information estimate. + """ + + _, T = array.shape + + # High-dimensional histogram + hist = self._bincount_hist(array, weights=None) + + def _plogp_vector(T): + """Precalculation of p*log(p) needed for entropies.""" + gfunc = np.zeros(T + 1) + data = np.arange(1, T + 1, 1) + gfunc[1:] = data * np.log(data) + def plogp_func(time): + return gfunc[time] + return np.vectorize(plogp_func) + + plogp = _plogp_vector(T) + hxyz = (-(plogp(hist)).sum() + plogp(T)) / float(T) + hxz = (-(plogp(hist.sum(axis=1))).sum() + plogp(T)) / float(T) + hyz = (-(plogp(hist.sum(axis=0))).sum() + plogp(T)) / float(T) + hz = (-(plogp(hist.sum(axis=0).sum(axis=0))).sum()+plogp(T)) / float(T) + val = hxz + hyz - hz - hxyz + return val
+ +
[docs] def get_shuffle_significance(self, array, xyz, value, + return_null_dist=False): + """Returns p-value for shuffle significance test. + + For residual-based test statistics only the residuals are shuffled. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + value : number + Value of test statistic for unshuffled estimate. + + Returns + ------- + pval : float + p-value + """ + + null_dist = self._get_shuffle_dist(array, xyz, + self.get_dependence_measure, + sig_samples=self.sig_samples, + sig_blocklength=self.sig_blocklength, + verbosity=self.verbosity) + + pval = (null_dist >= value).mean() + + if return_null_dist: + return pval, null_dist + return pval
+
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/independence_tests/gpdc.html b/docs/_build/html/_modules/tigramite/independence_tests/gpdc.html new file mode 100644 index 00000000..310cdfef --- /dev/null +++ b/docs/_build/html/_modules/tigramite/independence_tests/gpdc.html @@ -0,0 +1,742 @@ + + + + + + + tigramite.independence_tests.gpdc — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.independence_tests.gpdc

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+import numpy as np
+import warnings
+from .independence_tests_base import CondIndTest
+
+try:
+    import dcor
+    from sklearn import gaussian_process
+except Exception as e:
+    warnings.warn(str(e))
+
+class GaussProcReg():
+    r"""Gaussian processes abstract base class.
+
+    GP is estimated with scikit-learn and allows to flexibly specify kernels and
+    hyperparameters or let them be optimized automatically. The kernel specifies
+    the covariance function of the GP. Parameters can be passed on to
+    ``GaussianProcessRegressor`` using the gp_params dictionary. If None is
+    passed, the kernel '1.0 * RBF(1.0) + WhiteKernel()' is used with alpha=0 as
+    default. Note that the kernel's hyperparameters are optimized during
+    fitting.
+
+    When the null distribution is not analytically available, but can be
+    precomputed with the function generate_and_save_nulldists(...) which saves
+    a \*.npz file containing the null distribution for different sample sizes.
+    This file can then be supplied as null_dist_filename.
+
+    Parameters
+    ----------
+    null_samples : int
+        Number of null samples to use
+
+    cond_ind_test : CondIndTest
+        Conditional independence test that this Gaussian Proccess Regressor will
+        calculate the null distribution for.  This is used to grab the
+        get_dependence_measure function.
+
+    gp_params : dictionary, optional (default: None)
+        Dictionary with parameters for ``GaussianProcessRegressor``.
+
+    null_dist_filename : str, otional (default: None)
+        Path to file containing null distribution.
+
+    verbosity : int, optional (default: 0)
+        Level of verbosity.
+    """
+    def __init__(self,
+                 null_samples,
+                 cond_ind_test,
+                 gp_params=None,
+                 null_dist_filename=None,
+                 verbosity=0):
+        # Set the dependence measure function
+        self.cond_ind_test = cond_ind_test
+        # Set member variables
+        self.gp_params = gp_params
+        self.verbosity = verbosity
+        # Set the null distribution defaults
+        self.null_samples = null_samples
+        self.null_dists = {}
+        self.null_dist_filename = null_dist_filename
+        # Check if we are loading a null distrubtion from a cached file
+        if self.null_dist_filename is not None:
+            self.null_dists, self.null_samples = \
+                    self._load_nulldist(self.null_dist_filename)
+
+    def _load_nulldist(self, filename):
+        r"""
+        Load a precomputed null distribution from a \*.npz file.  This
+        distribution can be calculated using generate_and_save_nulldists(...).
+
+        Parameters
+        ----------
+        filename : strng
+            Path to the \*.npz file
+
+        Returns
+        -------
+        null_dists, null_samples : dict, int
+            The null distirbution as a dictionary of distributions keyed by
+            sample size, the number of null samples in total.
+        """
+        null_dist_file = np.load(filename)
+        null_dists = dict(zip(null_dist_file['T'],
+                              null_dist_file['exact_dist']))
+        null_samples = len(null_dist_file['exact_dist'][0])
+        return null_dists, null_samples
+
+    def _generate_nulldist(self, df,
+                           add_to_null_dists=True):
+        """Generates null distribution for pairwise independence tests.
+
+        Generates the null distribution for sample size df. Assumes pairwise
+        samples transformed to uniform marginals. Uses get_dependence_measure
+        available in class and generates self.sig_samples random samples. Adds
+        the null distributions to self.null_dists.
+
+        Parameters
+        ----------
+        df : int
+            Degrees of freedom / sample size to generate null distribution for.
+        add_to_null_dists : bool, optional (default: True)
+            Whether to add the null dist to the dictionary of null dists or
+            just return it.
+
+        Returns
+        -------
+        null_dist : array of shape [df,]
+            Only returned,if add_to_null_dists is False.
+        """
+
+        if self.verbosity > 0:
+            print("Generating null distribution for df = %d. " % df)
+            if add_to_null_dists:
+                print("For faster computations, run function "
+                      "generate_and_save_nulldists(...) to "
+                      "precompute null distribution and load *.npz file with "
+                      "argument null_dist_filename")
+
+        xyz = np.array([0,1])
+
+        null_dist = np.zeros(self.null_samples)
+        for i in range(self.null_samples):
+            array = self.cond_ind_test.random_state.random((2, df))
+            null_dist[i] = self.cond_ind_test.get_dependence_measure(array, xyz)
+
+        null_dist.sort()
+        if add_to_null_dists:
+            self.null_dists[df] = null_dist
+        return null_dist
+
+    def _generate_and_save_nulldists(self, sample_sizes, null_dist_filename):
+        """Generates and saves null distribution for pairwise independence
+        tests.
+
+        Generates the null distribution for different sample sizes. Calls
+        generate_nulldist. Null dists are saved to disk as
+        self.null_dist_filename.npz. Also adds the null distributions to
+        self.null_dists.
+
+        Parameters
+        ----------
+        sample_sizes : list
+            List of sample sizes.
+
+        null_dist_filename : str
+            Name to save file containing null distributions.
+        """
+
+        self.null_dist_filename = null_dist_filename
+
+        null_dists = np.zeros((len(sample_sizes), self.null_samples))
+
+        for iT, T in enumerate(sample_sizes):
+            null_dists[iT] = self._generate_nulldist(T, add_to_null_dists=False)
+            self.null_dists[T] = null_dists[iT]
+
+        np.savez("%s" % null_dist_filename,
+                 exact_dist=null_dists,
+                 T=np.array(sample_sizes))
+
+    def _get_single_residuals(self, array, target_var,
+                              return_means=False,
+                              standardize=True,
+                              return_likelihood=False):
+        """Returns residuals of Gaussian process regression.
+
+        Performs a GP regression of the variable indexed by target_var on the
+        conditions Z. Here array is assumed to contain X and Y as the first two
+        rows with the remaining rows (if present) containing the conditions Z.
+        Optionally returns the estimated mean and the likelihood.
+
+        Parameters
+        ----------
+        array : array-like
+            data array with X, Y, Z in rows and observations in columns
+
+        target_var : {0, 1}
+            Variable to regress out conditions from.
+
+        standardize : bool, optional (default: True)
+            Whether to standardize the array beforehand.
+
+        return_means : bool, optional (default: False)
+            Whether to return the estimated regression line.
+
+        return_likelihood : bool, optional (default: False)
+            Whether to return the log_marginal_likelihood of the fitted GP
+
+        Returns
+        -------
+        resid [, mean, likelihood] : array-like
+            The residual of the regression and optionally the estimated mean
+            and/or the likelihood.
+        """
+        dim, T = array.shape
+
+        if self.gp_params is None:
+            self.gp_params = {}
+
+        if dim <= 2:
+            if return_likelihood:
+                return array[target_var, :], -np.inf
+            return array[target_var, :]
+
+        # Standardize
+        if standardize:
+            array -= array.mean(axis=1).reshape(dim, 1)
+            array /= array.std(axis=1).reshape(dim, 1)
+            if np.isnan(array).sum() != 0:
+                raise ValueError("nans after standardizing, "
+                                 "possibly constant array!")
+
+        target_series = array[target_var, :]
+        z = np.fastCopyAndTranspose(array[2:])
+        if np.ndim(z) == 1:
+            z = z.reshape(-1, 1)
+
+
+        # Overwrite default kernel and alpha values
+        params = self.gp_params.copy()
+        if 'kernel' not in list(self.gp_params):
+            kernel = gaussian_process.kernels.RBF() +\
+             gaussian_process.kernels.WhiteKernel()
+        else:
+            kernel = self.gp_params['kernel']
+            del params['kernel']
+
+        if 'alpha' not in list(self.gp_params):
+            alpha = 0.
+        else:
+            alpha = self.gp_params['alpha']
+            del params['alpha']
+
+        gp = gaussian_process.GaussianProcessRegressor(kernel=kernel,
+                                               alpha=alpha,
+                                               **params)
+
+        gp.fit(z, target_series.reshape(-1, 1))
+
+        if self.verbosity > 3:
+            print(kernel, alpha, gp.kernel_, gp.alpha)
+
+        if return_likelihood:
+            likelihood = gp.log_marginal_likelihood()
+
+        mean = gp.predict(z).squeeze()
+
+        resid = target_series - mean
+
+        if return_means and not return_likelihood:
+            return (resid, mean)
+        elif return_likelihood and not return_means:
+            return (resid, likelihood)
+        elif return_means and return_likelihood:
+            return resid, mean, likelihood
+        return resid
+
+    def _get_model_selection_criterion(self, j, parents, tau_max=0):
+        """Returns log marginal likelihood for GP regression.
+
+        Fits a GP model of the parents to variable j and returns the negative
+        log marginal likelihood as a model selection score. Is used to determine
+        optimal hyperparameters in PCMCI, in particular the pc_alpha value.
+
+        Parameters
+        ----------
+        j : int
+            Index of target variable in data array.
+
+        parents : list
+            List of form [(0, -1), (3, -2), ...] containing parents.
+
+        tau_max : int, optional (default: 0)
+            Maximum time lag. This may be used to make sure that estimates for
+            different lags in X, Z, all have the same sample size.
+
+        Returns:
+        score : float
+            Model score.
+        """
+
+        Y = [(j, 0)]
+        X = [(j, 0)]   # dummy variable here
+        Z = parents
+        array, xyz = \
+                self.cond_ind_test.dataframe.construct_array(
+                    X=X, Y=Y, Z=Z,
+                    tau_max=tau_max,
+                    mask_type=self.cond_ind_test.mask_type,
+                    return_cleaned_xyz=False,
+                    do_checks=True,
+                    verbosity=self.verbosity)
+
+        dim, T = array.shape
+
+        _, logli = self._get_single_residuals(array,
+                                              target_var=1,
+                                              return_likelihood=True)
+
+        score = -logli
+        return score
+
+
[docs]class GPDC(CondIndTest): + r"""GPDC conditional independence test based on Gaussian processes and + distance correlation. + + GPDC is based on a Gaussian process (GP) regression and a distance + correlation test on the residuals [2]_. GP is estimated with scikit-learn + and allows to flexibly specify kernels and hyperparameters or let them be + optimized automatically. The distance correlation test is implemented with + cython. Here the null distribution is not analytically available, but can be + precomputed with the function generate_and_save_nulldists(...) which saves a + \*.npz file containing the null distribution for different sample sizes. + This file can then be supplied as null_dist_filename. + + Notes + ----- + + GPDC is based on a Gaussian process (GP) regression and a distance + correlation test on the residuals. Distance correlation is described in + [2]_. To test :math:`X \perp Y | Z`, first :math:`Z` is regressed out from + :math:`X` and :math:`Y` assuming the model + + .. math:: X & = f_X(Z) + \epsilon_{X} \\ + Y & = f_Y(Z) + \epsilon_{Y} \\ + \epsilon_{X,Y} &\sim \mathcal{N}(0, \sigma^2) + + using GP regression. Here :math:`\sigma^2` and the kernel bandwidth are + optimzed using ``sklearn``. Then the residuals are transformed to uniform + marginals yielding :math:`r_X,r_Y` and their dependency is tested with + + .. math:: \mathcal{R}\left(r_X, r_Y\right) + + The null distribution of the distance correlation should be pre-computed. + Otherwise it is computed during runtime. + + References + ---------- + .. [2] Gabor J. Szekely, Maria L. Rizzo, and Nail K. Bakirov: Measuring and + testing dependence by correlation of distances, + https://arxiv.org/abs/0803.4101 + + Parameters + ---------- + null_dist_filename : str, otional (default: None) + Path to file containing null distribution. + + gp_params : dictionary, optional (default: None) + Dictionary with parameters for ``GaussianProcessRegressor``. + + **kwargs : + Arguments passed on to parent class GaussProcReg. + + """ + @property + def measure(self): + """ + Concrete property to return the measure of the independence test + """ + return self._measure + + def __init__(self, + null_dist_filename=None, + gp_params=None, + **kwargs): + self._measure = 'gp_dc' + self.two_sided = False + self.residual_based = True + # Call the parent constructor + CondIndTest.__init__(self, **kwargs) + # Build the regressor + self.gauss_pr = GaussProcReg(self.sig_samples, + self, + gp_params=gp_params, + null_dist_filename=null_dist_filename, + verbosity=self.verbosity) + + if self.verbosity > 0: + print("null_dist_filename = %s" % self.gauss_pr.null_dist_filename) + if self.gauss_pr.gp_params is not None: + for key in list(self.gauss_pr.gp_params): + print("%s = %s" % (key, self.gauss_pr.gp_params[key])) + print("") + + def _load_nulldist(self, filename): + r""" + Load a precomputed null distribution from a \*.npz file. This + distribution can be calculated using generate_and_save_nulldists(...). + + Parameters + ---------- + filename : strng + Path to the \*.npz file + + Returns + ------- + null_dists, null_samples : dict, int + The null distirbution as a dictionary of distributions keyed by + sample size, the number of null samples in total. + """ + return self.gauss_pr._load_nulldist(filename) + +
[docs] def generate_nulldist(self, df, add_to_null_dists=True): + """Generates null distribution for pairwise independence tests. + + Generates the null distribution for sample size df. Assumes pairwise + samples transformed to uniform marginals. Uses get_dependence_measure + available in class and generates self.sig_samples random samples. Adds + the null distributions to self.gauss_pr.null_dists. + + Parameters + ---------- + df : int + Degrees of freedom / sample size to generate null distribution for. + + add_to_null_dists : bool, optional (default: True) + Whether to add the null dist to the dictionary of null dists or + just return it. + + Returns + ------- + null_dist : array of shape [df,] + Only returned,if add_to_null_dists is False. + """ + return self.gauss_pr._generate_nulldist(df, add_to_null_dists)
+ +
[docs] def generate_and_save_nulldists(self, sample_sizes, null_dist_filename): + """Generates and saves null distribution for pairwise independence + tests. + + Generates the null distribution for different sample sizes. Calls + generate_nulldist. Null dists are saved to disk as + self.null_dist_filename.npz. Also adds the null distributions to + self.gauss_pr.null_dists. + + Parameters + ---------- + sample_sizes : list + List of sample sizes. + + null_dist_filename : str + Name to save file containing null distributions. + """ + self.gauss_pr._generate_and_save_nulldists(sample_sizes, + null_dist_filename)
+ + def _get_single_residuals(self, array, target_var, + return_means=False, + standardize=True, + return_likelihood=False): + """Returns residuals of Gaussian process regression. + + Performs a GP regression of the variable indexed by target_var on the + conditions Z. Here array is assumed to contain X and Y as the first two + rows with the remaining rows (if present) containing the conditions Z. + Optionally returns the estimated mean and the likelihood. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + target_var : {0, 1} + Variable to regress out conditions from. + + standardize : bool, optional (default: True) + Whether to standardize the array beforehand. + + return_means : bool, optional (default: False) + Whether to return the estimated regression line. + + return_likelihood : bool, optional (default: False) + Whether to return the log_marginal_likelihood of the fitted GP + + Returns + ------- + resid [, mean, likelihood] : array-like + The residual of the regression and optionally the estimated mean + and/or the likelihood. + """ + return self.gauss_pr._get_single_residuals( + array, target_var, + return_means, + standardize, + return_likelihood) + +
[docs] def get_model_selection_criterion(self, j, parents, tau_max=0): + """Returns log marginal likelihood for GP regression. + + Fits a GP model of the parents to variable j and returns the negative + log marginal likelihood as a model selection score. Is used to determine + optimal hyperparameters in PCMCI, in particular the pc_alpha value. + + Parameters + ---------- + j : int + Index of target variable in data array. + + parents : list + List of form [(0, -1), (3, -2), ...] containing parents. + + tau_max : int, optional (default: 0) + Maximum time lag. This may be used to make sure that estimates for + different lags in X, Z, all have the same sample size. + + Returns: + score : float + Model score. + """ + return self.gauss_pr._get_model_selection_criterion(j, parents, tau_max)
+ +
[docs] def get_dependence_measure(self, array, xyz): + """Return GPDC measure. + + Estimated as the distance correlation of the residuals of a GP + regression. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + Returns + ------- + val : float + GPDC test statistic. + """ + + x_vals = self._get_single_residuals(array, target_var=0) + y_vals = self._get_single_residuals(array, target_var=1) + val = self._get_dcorr(np.array([x_vals, y_vals])) + return val
+ + + def _get_dcorr(self, array_resid): + """Return distance correlation coefficient. + + The variables are transformed to uniform marginals using the empirical + cumulative distribution function beforehand. Here the null distribution + is not analytically available, but can be precomputed with the function + generate_and_save_nulldists(...) which saves a \*.npz file containing + the null distribution for different sample sizes. This file can then be + supplied as null_dist_filename. + + Parameters + ---------- + array_resid : array-like + data array must be of shape (2, T) + + Returns + ------- + val : float + Distance correlation coefficient. + """ + # Remove ties before applying transformation to uniform marginals + # array_resid = self._remove_ties(array_resid, verbosity=4) + x_vals, y_vals = self._trafo2uniform(array_resid) + val = dcor.distance_correlation(x_vals, y_vals, method='AVL') + return val + +
[docs] def get_shuffle_significance(self, array, xyz, value, + return_null_dist=False): + """Returns p-value for shuffle significance test. + + For residual-based test statistics only the residuals are shuffled. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + value : number + Value of test statistic for unshuffled estimate. + + Returns + ------- + pval : float + p-value + """ + + x_vals = self._get_single_residuals(array, target_var=0) + y_vals = self._get_single_residuals(array, target_var=1) + array_resid = np.array([x_vals, y_vals]) + xyz_resid = np.array([0, 1]) + + null_dist = self._get_shuffle_dist(array_resid, xyz_resid, + self.get_dependence_measure, + sig_samples=self.sig_samples, + sig_blocklength=self.sig_blocklength, + verbosity=self.verbosity) + + pval = (null_dist >= value).mean() + + if return_null_dist: + return pval, null_dist + return pval
+ +
[docs] def get_analytic_significance(self, value, T, dim): + """Returns p-value for the distance correlation coefficient. + + The null distribution for necessary degrees of freedom (df) is loaded. + If not available, the null distribution is generated with the function + generate_nulldist(). It is recommended to generate the nulldists for a + wide range of sample sizes beforehand with the function + generate_and_save_nulldists(...). The distance correlation coefficient + is one-sided. If the degrees of freedom are less than 1, numpy.nan is + returned. + + Parameters + ---------- + value : float + Test statistic value. + + T : int + Sample length + + dim : int + Dimensionality, ie, number of features. + + Returns + ------- + pval : float or numpy.nan + p-value. + """ + + # GP regression approximately doesn't cost degrees of freedom + df = T + + if df < 1: + pval = np.nan + else: + # idx_near = (np.abs(self.sample_sizes - df)).argmin() + if int(df) not in list(self.gauss_pr.null_dists): + # if np.abs(self.sample_sizes[idx_near] - df) / float(df) > 0.01: + if self.verbosity > 0: + print("Null distribution for GPDC not available " + "for deg. of freed. = %d." % df) + self.generate_nulldist(df) + null_dist_here = self.gauss_pr.null_dists[int(df)] + pval = np.mean(null_dist_here > np.abs(value)) + return pval
+ +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/independence_tests/gpdc_torch.html b/docs/_build/html/_modules/tigramite/independence_tests/gpdc_torch.html new file mode 100644 index 00000000..770669ac --- /dev/null +++ b/docs/_build/html/_modules/tigramite/independence_tests/gpdc_torch.html @@ -0,0 +1,896 @@ + + + + + + + tigramite.independence_tests.gpdc_torch — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.independence_tests.gpdc_torch

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+import warnings
+import numpy as np
+import gc
+from .independence_tests_base import CondIndTest
+try:
+    import dcor
+    import torch
+    import gpytorch
+    from .LBFGS import FullBatchLBFGS
+except Exception as e:
+    warnings.warn(str(e))
+
+class GaussProcRegTorch():
+    r"""Gaussian processes abstract base class.
+
+    GP is estimated with gpytorch. Note that the kernel's hyperparameters are
+    optimized during fitting.
+
+    When the null distribution is not analytically available, but can be
+    precomputed with the function generate_and_save_nulldists(...) which saves
+    a \*.npz file containing the null distribution for different sample sizes.
+    This file can then be supplied as null_dist_filename.
+
+    Parameters
+    ----------
+    null_samples : int
+        Number of null samples to use
+
+    cond_ind_test : CondIndTest
+        Conditional independence test that this Gaussian Proccess Regressor will
+        calculate the null distribution for.  This is used to grab the
+        get_dependence_measure function.
+
+    null_dist_filename : str, otional (default: None)
+        Path to file containing null distribution.
+
+    verbosity : int, optional (default: 0)
+        Level of verbosity.
+    """
+
+    def __init__(self,
+                 null_samples,
+                 cond_ind_test,
+                 null_dist_filename=None,
+                 verbosity=0):
+        # Set the dependence measure function
+        self.cond_ind_test = cond_ind_test
+        # Set member variables
+        self.verbosity = verbosity
+        # Set the null distribution defaults
+        self.null_samples = null_samples
+        self.null_dists = {}
+        self.null_dist_filename = null_dist_filename
+        # Check if we are loading a null distrubtion from a cached file
+        if self.null_dist_filename is not None:
+            self.null_dists, self.null_samples = \
+                self._load_nulldist(self.null_dist_filename)
+        # Size for batching
+        self.checkpoint_size = None
+
+    def _load_nulldist(self, filename):
+        r"""
+        Load a precomputed null distribution from a \*.npz file.  This
+        distribution can be calculated using generate_and_save_nulldists(...).
+
+        Parameters
+        ----------
+        filename : strng
+            Path to the \*.npz file
+
+        Returns
+        -------
+        null_dists, null_samples : dict, int
+            The null distirbution as a dictionary of distributions keyed by
+            sample size, the number of null samples in total.
+        """
+        null_dist_file = np.load(filename)
+        null_dists = dict(zip(null_dist_file['T'],
+                              null_dist_file['exact_dist']))
+        null_samples = len(null_dist_file['exact_dist'][0])
+        return null_dists, null_samples
+
+    def _generate_nulldist(self, df,
+                           add_to_null_dists=True):
+        """Generates null distribution for pairwise independence tests.
+
+        Generates the null distribution for sample size df. Assumes pairwise
+        samples transformed to uniform marginals. Uses get_dependence_measure
+        available in class and generates self.sig_samples random samples. Adds
+        the null distributions to self.null_dists.
+
+        Parameters
+        ----------
+        df : int
+            Degrees of freedom / sample size to generate null distribution for.
+        add_to_null_dists : bool, optional (default: True)
+            Whether to add the null dist to the dictionary of null dists or
+            just return it.
+
+        Returns
+        -------
+        null_dist : array of shape [df,]
+            Only returned,if add_to_null_dists is False.
+        """
+
+        if self.verbosity > 0:
+            print("Generating null distribution for df = %d. " % df)
+            if add_to_null_dists:
+                print("For faster computations, run function "
+                      "generate_and_save_nulldists(...) to "
+                      "precompute null distribution and load *.npz file with "
+                      "argument null_dist_filename")
+
+        xyz = np.array([0, 1])
+
+        null_dist = np.zeros(self.null_samples)
+        for i in range(self.null_samples):
+            array = self.cond_ind_test.random_state.random((2, df))
+            null_dist[i] = self.cond_ind_test.get_dependence_measure(
+                array, xyz)
+
+        null_dist.sort()
+        if add_to_null_dists:
+            self.null_dists[df] = null_dist
+        return null_dist
+
+    def _generate_and_save_nulldists(self, sample_sizes, null_dist_filename):
+        """Generates and saves null distribution for pairwise independence
+        tests.
+
+        Generates the null distribution for different sample sizes. Calls
+        generate_nulldist. Null dists are saved to disk as
+        self.null_dist_filename.npz. Also adds the null distributions to
+        self.null_dists.
+
+        Parameters
+        ----------
+        sample_sizes : list
+            List of sample sizes.
+
+        null_dist_filename : str
+            Name to save file containing null distributions.
+        """
+
+        self.null_dist_filename = null_dist_filename
+
+        null_dists = np.zeros((len(sample_sizes), self.null_samples))
+
+        for iT, T in enumerate(sample_sizes):
+            null_dists[iT] = self._generate_nulldist(
+                T, add_to_null_dists=False)
+            self.null_dists[T] = null_dists[iT]
+
+        np.savez("%s" % null_dist_filename,
+                 exact_dist=null_dists,
+                 T=np.array(sample_sizes))
+
+
+    def _get_single_residuals(self, array, target_var,
+                                    return_means=False,
+                                    standardize=True,
+                                    return_likelihood=False,
+                                    training_iter=50,
+                                    lr=0.1):
+        """Returns residuals of Gaussian process regression.
+
+        Performs a GP regression of the variable indexed by target_var on the
+        conditions Z. Here array is assumed to contain X and Y as the first two
+        rows with the remaining rows (if present) containing the conditions Z.
+        Optionally returns the estimated mean and the likelihood.
+
+        Parameters
+        ----------
+        array : array-like
+            data array with X, Y, Z in rows and observations in columns
+
+        target_var : {0, 1}
+            Variable to regress out conditions from.
+
+        standardize : bool, optional (default: True)
+            Whether to standardize the array beforehand.
+
+        return_means : bool, optional (default: False)
+            Whether to return the estimated regression line.
+
+        return_likelihood : bool, optional (default: False)
+            Whether to return the log_marginal_likelihood of the fitted GP.
+
+        training_iter : int, optional (default: 50)
+            Number of training iterations.
+
+        lr : float, optional (default: 0.1)
+            Learning rate (default: 0.1).
+
+        Returns
+        -------
+        resid [, mean, likelihood] : array-like
+            The residual of the regression and optionally the estimated mean
+            and/or the likelihood.
+        """
+
+        dim, T = array.shape
+
+        if dim <= 2:
+            if return_likelihood:
+                return array[target_var, :], -np.inf
+            return array[target_var, :]
+
+        # Implement using PyTorch
+        # Standardize
+        if standardize:
+            array -= array.mean(axis=1).reshape(dim, 1)
+            array /= array.std(axis=1).reshape(dim, 1)
+            if np.isnan(array).any():
+                raise ValueError("Nans after standardizing, "
+                                 "possibly constant array!")
+
+        target_series = array[target_var, :]
+        z = np.fastCopyAndTranspose(array[2:])
+        if np.ndim(z) == 1:
+            z = z.reshape(-1, 1)
+
+        train_x = torch.tensor(z).float()
+        train_y = torch.tensor(target_series).float()
+
+        device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
+        output_device = torch.device(device_type)
+        train_x, train_y = train_x.to(output_device), train_y.to(output_device)
+
+        if device_type == 'cuda':
+            # If GPU is available, use MultiGPU with Kernel Partitioning
+            n_devices = torch.cuda.device_count()
+            class mExactGPModel(gpytorch.models.ExactGP):
+                def __init__(self, train_x, train_y, likelihood, n_devices):
+                    super(mExactGPModel, self).__init__(train_x, train_y, likelihood)
+                    self.mean_module = gpytorch.means.ConstantMean()
+                    base_covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
+
+                    self.covar_module = gpytorch.kernels.MultiDeviceKernel(
+                        base_covar_module, device_ids=range(n_devices),
+                        output_device=output_device
+                    )
+
+                def forward(self, x):
+                    mean_x = self.mean_module(x)
+                    covar_x = self.covar_module(x)
+                    return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+
+            def mtrain(train_x,
+                      train_y,
+                      n_devices,
+                      output_device,
+                      checkpoint_size,
+                      preconditioner_size,
+                      n_training_iter,
+                      ):
+                likelihood = gpytorch.likelihoods.GaussianLikelihood().to(output_device)
+                model = mExactGPModel(train_x, train_y, likelihood, n_devices).to(output_device)
+                model.train()
+                likelihood.train()
+
+                optimizer = FullBatchLBFGS(model.parameters(), lr=lr)
+                # "Loss" for GPs - the marginal log likelihood
+                mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
+
+                with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), \
+                        gpytorch.settings.max_preconditioner_size(preconditioner_size):
+
+                    def closure():
+                        optimizer.zero_grad()
+                        output = model(train_x)
+                        loss = -mll(output, train_y)
+                        return loss
+
+                    loss = closure()
+                    loss.backward()
+
+                    for i in range(n_training_iter):
+                        options = {'closure': closure, 'current_loss': loss, 'max_ls': 10}
+                        loss, _, _, _, _, _, _, fail = optimizer.step(options)
+
+                        '''print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
+                            i + 1, n_training_iter, loss.item(),
+                            model.covar_module.module.base_kernel.lengthscale.item(),
+                            model.likelihood.noise.item()
+                        ))'''
+
+                        if fail:
+                            # print('Convergence reached!')
+                            break
+
+                return model, likelihood, mll
+
+            def find_best_gpu_setting(train_x,
+                                      train_y,
+                                      n_devices,
+                                      output_device,
+                                      preconditioner_size
+                                      ):
+                N = train_x.size(0)
+
+                # Find the optimum partition/checkpoint size by decreasing in powers of 2
+                # Start with no partitioning (size = 0)
+                settings = [0] + [int(n) for n in np.ceil(N / 2 ** np.arange(1, np.floor(np.log2(N))))]
+
+                for checkpoint_size in settings:
+                    print('Number of devices: {} -- Kernel partition size: {}'.format(n_devices, checkpoint_size))
+                    try:
+                        # Try a full forward and backward pass with this setting to check memory usage
+                        _, _, _ = mtrain(train_x, train_y,
+                                     n_devices=n_devices, output_device=output_device,
+                                     checkpoint_size=checkpoint_size,
+                                     preconditioner_size=preconditioner_size, n_training_iter=1)
+
+                        # when successful, break out of for-loop and jump to finally block
+                        break
+                    except RuntimeError as e:
+                        pass
+                    except AttributeError as e:
+                        pass
+                    finally:
+                        # handle CUDA OOM error
+                        gc.collect()
+                        torch.cuda.empty_cache()
+                return checkpoint_size
+
+            # Set a large enough preconditioner size to reduce the number of CG iterations run
+            preconditioner_size = 100
+            if self.checkpoint_size is None:
+                self.checkpoint_size = find_best_gpu_setting(train_x, train_y,
+                                                        n_devices=n_devices,
+                                                        output_device=output_device,
+                                                        preconditioner_size=preconditioner_size)
+
+            model, likelihood, mll = mtrain(train_x, train_y,
+                                      n_devices=n_devices, output_device=output_device,
+                                      checkpoint_size=self.checkpoint_size,
+                                      preconditioner_size=100,
+                                      n_training_iter=training_iter)
+
+            # Get into evaluation (predictive posterior) mode
+            model.eval()
+            likelihood.eval()
+
+            # Make predictions by feeding model through likelihood
+            with torch.no_grad(), gpytorch.settings.fast_pred_var(), gpytorch.beta_features.checkpoint_kernel(1000):
+                mean = model(train_x).loc.detach()
+                loglik = mll(model(train_x), train_y)*T
+
+            resid = (train_y - mean).detach().cpu().numpy()
+            mean = mean.detach().cpu().numpy()
+
+        else:
+            # If only CPU is available, we will use the simplest form of GP model, exact inference
+            class ExactGPModel(gpytorch.models.ExactGP):
+                def __init__(self, train_x, train_y, likelihood):
+                    super(ExactGPModel, self).__init__(
+                        train_x, train_y, likelihood)
+                    self.mean_module = gpytorch.means.ConstantMean()
+
+                    # We only use the RBF kernel here, the WhiteNoiseKernel is deprecated
+                    # and its featured integrated into the Likelihood-Module.
+                    self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
+
+                def forward(self, x):
+                    mean_x = self.mean_module(x)
+                    covar_x = self.covar_module(x)
+                    return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
+
+            # initialize likelihood and model
+            likelihood = gpytorch.likelihoods.GaussianLikelihood()
+            model = ExactGPModel(train_x, train_y, likelihood)
+
+            # Find optimal model hyperparameters
+            model.train()
+            likelihood.train()
+
+            # Use the adam optimizer
+            # Includes GaussianLikelihood parameters
+            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
+
+            # "Loss" for GPs - the marginal log likelihood
+            mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
+
+            for i in range(training_iter):
+                # Zero gradients from previous iteration
+                optimizer.zero_grad()
+                # Output from model
+                output = model(train_x)
+
+                # Calc loss and backprop gradients
+                loss = -mll(output, train_y)
+                loss.backward()
+                optimizer.step()
+
+            # Get into evaluation (predictive posterior) mode
+            model.eval()
+            likelihood.eval()
+
+            # Make predictions by feeding model through likelihood
+            with torch.no_grad(), gpytorch.settings.fast_pred_var():
+                mean = model(train_x).loc.detach()
+                loglik = mll(model(train_x), train_y) * T
+
+            resid = (train_y - mean).detach().numpy()
+            mean = mean.detach().numpy()
+
+        if return_means and not return_likelihood:
+            return resid, mean
+        elif return_likelihood and not return_means:
+            return resid, loglik
+        elif return_means and return_likelihood:
+            return resid, mean, loglik
+        return resid
+
+    def _get_model_selection_criterion(self, j, parents, tau_max=0):
+        """Returns log marginal likelihood for GP regression.
+
+        Fits a GP model of the parents to variable j and returns the negative
+        log marginal likelihood as a model selection score. Is used to determine
+        optimal hyperparameters in PCMCI, in particular the pc_alpha value.
+
+        Parameters
+        ----------
+        j : int
+            Index of target variable in data array.
+
+        parents : list
+            List of form [(0, -1), (3, -2), ...] containing parents.
+
+        tau_max : int, optional (default: 0)
+            Maximum time lag. This may be used to make sure that estimates for
+            different lags in X, Z, all have the same sample size.
+
+        Returns:
+        score : float
+            Model score.
+        """
+
+        Y = [(j, 0)]
+        X = [(j, 0)]   # dummy variable here
+        Z = parents
+        array, xyz = \
+            self.cond_ind_test.dataframe.construct_array(
+                X=X, Y=Y, Z=Z,
+                tau_max=tau_max,
+                mask_type=self.cond_ind_test.mask_type,
+                return_cleaned_xyz=False,
+                do_checks=True,
+                verbosity=self.verbosity)
+
+        dim, T = array.shape
+
+        _, logli = self._get_single_residuals(array,
+                                              target_var=1,
+                                              return_likelihood=True)
+
+        score = -logli
+        return score
+
+
+
[docs]class GPDCtorch(CondIndTest): + r"""GPDC conditional independence test based on Gaussian processes and + distance correlation. Here with gpytorch implementation. + + GPDC is based on a Gaussian process (GP) regression and a distance + correlation test on the residuals [2]_. GP is estimated with gpytorch. + The distance correlation test is implemented with the dcor package available + from pip. Here the null distribution is not analytically available, but can be + precomputed with the function generate_and_save_nulldists(...) which saves a + \*.npz file containing the null distribution for different sample sizes. + This file can then be supplied as null_dist_filename. + + Notes + ----- + + GPDC is based on a Gaussian process (GP) regression and a distance + correlation test on the residuals. Distance correlation is described in + [2]_. To test :math:`X \perp Y | Z`, first :math:`Z` is regressed out from + :math:`X` and :math:`Y` assuming the model + + .. math:: X & = f_X(Z) + \epsilon_{X} \\ + Y & = f_Y(Z) + \epsilon_{Y} \\ + \epsilon_{X,Y} &\sim \mathcal{N}(0, \sigma^2) + + using GP regression. Here :math:`\sigma^2` and the kernel bandwidth are + optimzed using ``gpytorch``. Then the residuals are transformed to uniform + marginals yielding :math:`r_X,r_Y` and their dependency is tested with + + .. math:: \mathcal{R}\left(r_X, r_Y\right) + + The null distribution of the distance correlation should be pre-computed. + Otherwise it is computed during runtime. + + Parameters + ---------- + null_dist_filename : str, otional (default: None) + Path to file containing null distribution. + + **kwargs : + Arguments passed on to parent class GaussProcRegTorch. + + """ + @property + def measure(self): + """ + Concrete property to return the measure of the independence test + """ + return self._measure + + def __init__(self, + null_dist_filename=None, + **kwargs): + self._measure = 'gp_dc' + self.two_sided = False + self.residual_based = True + # Call the parent constructor + CondIndTest.__init__(self, **kwargs) + # Build the regressor + self.gauss_pr = GaussProcRegTorch(self.sig_samples, + self, + null_dist_filename=null_dist_filename, + verbosity=self.verbosity) + + if self.verbosity > 0: + print("null_dist_filename = %s" % self.gauss_pr.null_dist_filename) + print("") + + def _load_nulldist(self, filename): + r""" + Load a precomputed null distribution from a \*.npz file. This + distribution can be calculated using generate_and_save_nulldists(...). + + Parameters + ---------- + filename : strng + Path to the \*.npz file + + Returns + ------- + null_dists, null_samples : dict, int + The null distirbution as a dictionary of distributions keyed by + sample size, the number of null samples in total. + """ + return self.gauss_pr._load_nulldist(filename) + +
[docs] def generate_nulldist(self, df, add_to_null_dists=True): + """Generates null distribution for pairwise independence tests. + + Generates the null distribution for sample size df. Assumes pairwise + samples transformed to uniform marginals. Uses get_dependence_measure + available in class and generates self.sig_samples random samples. Adds + the null distributions to self.gauss_pr.null_dists. + + Parameters + ---------- + df : int + Degrees of freedom / sample size to generate null distribution for. + + add_to_null_dists : bool, optional (default: True) + Whether to add the null dist to the dictionary of null dists or + just return it. + + Returns + ------- + null_dist : array of shape [df,] + Only returned,if add_to_null_dists is False. + """ + return self.gauss_pr._generate_nulldist(df, add_to_null_dists)
+ +
[docs] def generate_and_save_nulldists(self, sample_sizes, null_dist_filename): + """Generates and saves null distribution for pairwise independence + tests. + + Generates the null distribution for different sample sizes. Calls + generate_nulldist. Null dists are saved to disk as + self.null_dist_filename.npz. Also adds the null distributions to + self.gauss_pr.null_dists. + + Parameters + ---------- + sample_sizes : list + List of sample sizes. + + null_dist_filename : str + Name to save file containing null distributions. + """ + self.gauss_pr._generate_and_save_nulldists(sample_sizes, + null_dist_filename)
+ + + def _get_single_residuals(self, array, target_var, + return_means=False, + standardize=True, + return_likelihood=False, + training_iter=50, + lr=0.1): + """Returns residuals of Gaussian process regression. + + Performs a GP regression of the variable indexed by target_var on the + conditions Z. Here array is assumed to contain X and Y as the first two + rows with the remaining rows (if present) containing the conditions Z. + Optionally returns the estimated mean and the likelihood. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + target_var : {0, 1} + Variable to regress out conditions from. + + standardize : bool, optional (default: True) + Whether to standardize the array beforehand. + + return_means : bool, optional (default: False) + Whether to return the estimated regression line. + + return_likelihood : bool, optional (default: False) + Whether to return the log_marginal_likelihood of the fitted GP + + training_iter : int, optional (default: 50) + Number of training iterations. + + lr : float, optional (default: 0.1) + Learning rate (default: 0.1). + + Returns + ------- + resid [, mean, likelihood] : array-like + The residual of the regression and optionally the estimated mean + and/or the likelihood. + """ + return self.gauss_pr._get_single_residuals( + array, target_var, + return_means, + standardize, + return_likelihood, + training_iter, + lr) + +
[docs] def get_model_selection_criterion(self, j, parents, tau_max=0): + """Returns log marginal likelihood for GP regression. + + Fits a GP model of the parents to variable j and returns the negative + log marginal likelihood as a model selection score. Is used to determine + optimal hyperparameters in PCMCI, in particular the pc_alpha value. + + Parameters + ---------- + j : int + Index of target variable in data array. + + parents : list + List of form [(0, -1), (3, -2), ...] containing parents. + + tau_max : int, optional (default: 0) + Maximum time lag. This may be used to make sure that estimates for + different lags in X, Z, all have the same sample size. + + Returns: + score : float + Model score. + """ + return self.gauss_pr._get_model_selection_criterion(j, parents, tau_max)
+ +
[docs] def get_dependence_measure(self, array, xyz): + """Return GPDC measure. + + Estimated as the distance correlation of the residuals of a GP + regression. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + Returns + ------- + val : float + GPDC test statistic. + """ + + x_vals = self._get_single_residuals(array, target_var=0) + y_vals = self._get_single_residuals(array, target_var=1) + val = self._get_dcorr(np.array([x_vals, y_vals])) + return val
+ + def _get_dcorr(self, array_resid): + """Return distance correlation coefficient. + + The variables are transformed to uniform marginals using the empirical + cumulative distribution function beforehand. Here the null distribution + is not analytically available, but can be precomputed with the function + generate_and_save_nulldists(...) which saves a \*.npz file containing + the null distribution for different sample sizes. This file can then be + supplied as null_dist_filename. + + Parameters + ---------- + array_resid : array-like + data array must be of shape (2, T) + + Returns + ------- + val : float + Distance correlation coefficient. + """ + # Remove ties before applying transformation to uniform marginals + # array_resid = self._remove_ties(array_resid, verbosity=4) + x_vals, y_vals = self._trafo2uniform(array_resid) + val = dcor.distance_correlation(x_vals, y_vals, method='AVL') + return val + +
[docs] def get_shuffle_significance(self, array, xyz, value, + return_null_dist=False): + """Returns p-value for shuffle significance test. + + For residual-based test statistics only the residuals are shuffled. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + value : number + Value of test statistic for unshuffled estimate. + + Returns + ------- + pval : float + p-value + """ + + x_vals = self._get_single_residuals(array, target_var=0) + y_vals = self._get_single_residuals(array, target_var=1) + array_resid = np.array([x_vals, y_vals]) + xyz_resid = np.array([0, 1]) + + null_dist = self._get_shuffle_dist(array_resid, xyz_resid, + self.get_dependence_measure, + sig_samples=self.sig_samples, + sig_blocklength=self.sig_blocklength, + verbosity=self.verbosity) + + pval = (null_dist >= value).mean() + + if return_null_dist: + return pval, null_dist + return pval
+ +
[docs] def get_analytic_significance(self, value, T, dim): + """Returns p-value for the distance correlation coefficient. + + The null distribution for necessary degrees of freedom (df) is loaded. + If not available, the null distribution is generated with the function + generate_nulldist(). It is recommended to generate the nulldists for a + wide range of sample sizes beforehand with the function + generate_and_save_nulldists(...). The distance correlation coefficient + is one-sided. If the degrees of freedom are less than 1, numpy.nan is + returned. + + Parameters + ---------- + value : float + Test statistic value. + + T : int + Sample length + + dim : int + Dimensionality, ie, number of features. + + Returns + ------- + pval : float or numpy.nan + p-value. + """ + + # GP regression approximately doesn't cost degrees of freedom + df = T + + if df < 1: + pval = np.nan + else: + # idx_near = (np.abs(self.sample_sizes - df)).argmin() + if int(df) not in list(self.gauss_pr.null_dists): + # if np.abs(self.sample_sizes[idx_near] - df) / float(df) > 0.01: + if self.verbosity > 0: + print("Null distribution for GPDC not available " + "for deg. of freed. = %d." % df) + self.generate_nulldist(df) + null_dist_here = self.gauss_pr.null_dists[int(df)] + pval = np.mean(null_dist_here > np.abs(value)) + return pval
+ +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/independence_tests/independence_tests_base.html b/docs/_build/html/_modules/tigramite/independence_tests/independence_tests_base.html new file mode 100644 index 00000000..22884d74 --- /dev/null +++ b/docs/_build/html/_modules/tigramite/independence_tests/independence_tests_base.html @@ -0,0 +1,1100 @@ + + + + + + + tigramite.independence_tests.independence_tests_base — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.independence_tests.independence_tests_base

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+import warnings
+import math
+import abc
+import numpy as np
+import six
+from hashlib import sha1
+
+
+
[docs]@six.add_metaclass(abc.ABCMeta) +class CondIndTest(): + """Base class of conditional independence tests. + + Provides useful general functions for different independence tests such as + shuffle significance testing and bootstrap confidence estimation. Also + handles masked samples. Other test classes can inherit from this class. + + Parameters + ---------- + seed : int, optional(default = 42) + Seed for RandomState (default_rng) + + mask_type : str, optional (default = None) + Must be in {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence measure + I(X; Y | Z) the samples should be masked. If None, the mask is not used. + Explained in tutorial on masking and missing values. + + significance : str, optional (default: 'analytic') + Type of significance test to use. In this package 'analytic', + 'fixed_thres' and 'shuffle_test' are available. + + fixed_thres : float, optional (default: 0.1) + If significance is 'fixed_thres', this specifies the threshold for the + absolute value of the dependence measure. + + sig_samples : int, optional (default: 1000) + Number of samples for shuffle significance test. + + sig_blocklength : int, optional (default: None) + Block length for block-shuffle significance test. If None, the + block length is determined from the decay of the autocovariance as + explained in [1]_. + + confidence : str, optional (default: None) + Specify type of confidence estimation. If False, numpy.nan is returned. + 'bootstrap' can be used with any test, for ParCorr also 'analytic' is + implemented. + + conf_lev : float, optional (default: 0.9) + Two-sided confidence interval. + + conf_samples : int, optional (default: 100) + Number of samples for bootstrap. + + conf_blocklength : int, optional (default: None) + Block length for block-bootstrap. If None, the block length is + determined from the decay of the autocovariance as explained in [1]_. + + recycle_residuals : bool, optional (default: False) + Specifies whether residuals should be stored. This may be faster, but + can cost considerable memory. + + verbosity : int, optional (default: 0) + Level of verbosity. + """ +
[docs] @abc.abstractmethod + def get_dependence_measure(self, array, xyz): + """ + Abstract function that all concrete classes must instantiate. + """ + pass
+ + @abc.abstractproperty + def measure(self): + """ + Abstract property to store the type of independence test. + """ + pass + + def __init__(self, + seed=42, + mask_type=None, + significance='analytic', + fixed_thres=0.1, + sig_samples=1000, + sig_blocklength=None, + confidence=None, + conf_lev=0.9, + conf_samples=100, + conf_blocklength=None, + recycle_residuals=False, + verbosity=0): + # Set the dataframe to None for now, will be reset during pcmci call + self.dataframe = None + # Set the options + self.random_state = np.random.default_rng(seed) + self.significance = significance + self.sig_samples = sig_samples + self.sig_blocklength = sig_blocklength + self.fixed_thres = fixed_thres + self.verbosity = verbosity + self.cached_ci_results = {} + # If we recycle residuals, then set up a residual cache + self.recycle_residuals = recycle_residuals + if self.recycle_residuals: + self.residuals = {} + # If we use a mask, we cannot recycle residuals + self.set_mask_type(mask_type) + + # Set the confidence type and details + self.confidence = confidence + self.conf_lev = conf_lev + self.conf_samples = conf_samples + self.conf_blocklength = conf_blocklength + + # Print information about the + if self.verbosity > 0: + self.print_info() + +
[docs] def set_mask_type(self, mask_type): + """ + Setter for mask type to ensure that this option does not clash with + recycle_residuals. + + Parameters + ---------- + mask_type : str + Must be in {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence measure + I(X; Y | Z) the samples should be masked. If None, the mask is not used. + Explained in tutorial on masking and missing values. + """ + # Set the mask type + self.mask_type = mask_type + # Check if this clashes with residual recycling + if self.mask_type is not None: + if self.recycle_residuals is True: + warnings.warn("Using a mask disables recycling residuals.") + self.recycle_residuals = False + # Check the mask type is keyed correctly + self._check_mask_type()
+ +
[docs] def print_info(self): + """ + Print information about the conditional independence test parameters + """ + info_str = "\n# Initialize conditional independence test\n\nParameters:" + info_str += "\nindependence test = %s" % self.measure + info_str += "\nsignificance = %s" % self.significance + # Check if we are using a shuffle test + if self.significance == 'shuffle_test': + info_str += "\nsig_samples = %s" % self.sig_samples + info_str += "\nsig_blocklength = %s" % self.sig_blocklength + # Check if we are using a fixed threshold + elif self.significance == 'fixed_thres': + info_str += "\nfixed_thres = %s" % self.fixed_thres + # Check if we have a confidence type + if self.confidence: + info_str += "\nconfidence = %s" % self.confidence + info_str += "\nconf_lev = %s" % self.conf_lev + # Check if this confidence type is boostrapping + if self.confidence == 'bootstrap': + info_str += "\nconf_samples = %s" % self.conf_samples + info_str += "\nconf_blocklength = %s" %self.conf_blocklength + # Check if we use a non-trivial mask type + if self.mask_type is not None: + info_str += "\nmask_type = %s" % self.mask_type + # Check if we are recycling residuals or not + if self.recycle_residuals: + info_str += "\nrecycle_residuals = %s" % self.recycle_residuals + # Print the information string + print(info_str)
+ + def _check_mask_type(self): + """ + mask_type : str, optional (default = None) + Must be in {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence measure + I(X; Y | Z) the samples should be masked. If None, the mask is not used. + Explained in tutorial on masking and missing values. + """ + if self.mask_type is not None: + mask_set = set(self.mask_type) - set(['x', 'y', 'z']) + if mask_set: + err_msg = "mask_type = %s," % self.mask_type + " but must be" +\ + " list containing 'x','y','z', or any combination" + raise ValueError(err_msg) + + +
[docs] def get_analytic_confidence(self, value, df, conf_lev): + """ + Base class assumption that this is not implemented. Concrete classes + should override when possible. + """ + raise NotImplementedError("Analytic confidence not"+\ + " implemented for %s" % self.measure)
+ +
[docs] def get_model_selection_criterion(self, j, parents, tau_max=0): + """ + Base class assumption that this is not implemented. Concrete classes + should override when possible. + """ + raise NotImplementedError("Model selection not"+\ + " implemented for %s" % self.measure)
+ +
[docs] def get_analytic_significance(self, value, T, dim): + """ + Base class assumption that this is not implemented. Concrete classes + should override when possible. + """ + raise NotImplementedError("Analytic significance not"+\ + " implemented for %s" % self.measure)
+ +
[docs] def get_shuffle_significance(self, array, xyz, value, + return_null_dist=False): + """ + Base class assumption that this is not implemented. Concrete classes + should override when possible. + """ + raise NotImplementedError("Shuffle significance not"+\ + " implemented for %s" % self.measure)
+ + def _get_single_residuals(self, array, target_var, + standardize=True, return_means=False): + """ + Base class assumption that this is not implemented. Concrete classes + should override when possible. + """ + raise NotImplementedError("Residual calculation not"+\ + " implemented for %s" % self.measure) + +
[docs] def set_dataframe(self, dataframe): + """Initialize and check the dataframe. + + Parameters + ---------- + dataframe : data object + Set tigramite dataframe object. It must have the attributes + dataframe.values yielding a numpy array of shape (observations T, + variables N) and optionally a mask of the same shape and a missing + values flag. + + """ + self.dataframe = dataframe + if self.mask_type is not None: + dataframe._check_mask(require_mask=True)
+ + def _keyfy(self, x, z): + """Helper function to make lists unique.""" + return (tuple(set(x)), tuple(set(z))) + + def _get_array(self, X, Y, Z, tau_max=0, cut_off='2xtau_max', + verbosity=0): + """Convencience wrapper around construct_array.""" + + if self.measure in ['par_corr']: + if len(X) > 1 or len(Y) > 1: + raise ValueError("X and Y for %s must be univariate." % + self.measure) + # Call the wrapped function + return self.dataframe.construct_array(X=X, Y=Y, Z=Z, + tau_max=tau_max, + mask_type=self.mask_type, + return_cleaned_xyz=True, + do_checks=True, + cut_off=cut_off, + verbosity=verbosity) + + def _get_array_hash(self, array, xyz, XYZ): + """Helper function to get hash of array. + + For a CI test X _|_ Y | Z the order of variables within X or Y or Z + does not matter and also the order X and Y can be swapped. + Hence, to compare hashes of the whole array, we order accordingly + to create a unique, order-independent hash. + + Parameters + ---------- + array : Data array of shape (dim, T) + Data array. + xyz : array + Identifier array of shape (dim,) identifying which row in array + corresponds to X, Y, and Z + XYZ : list of tuples + + Returns + ------- + combined_hash : str + Hash that identifies uniquely an array of XYZ + """ + + X, Y, Z = XYZ + + # First check whether CI result was already computed + # by checking whether hash of (xyz, array) already exists + # Individually sort X, Y, Z since for a CI test it does not matter + # how they are aranged + x_orderd = sorted(range(len(X)), key=X.__getitem__) + arr_x = array[xyz==0][x_orderd] + x_hash = sha1(np.ascontiguousarray(arr_x)).hexdigest() + + y_orderd = sorted(range(len(Y)), key=Y.__getitem__) + arr_y = array[xyz==1][y_orderd] + y_hash = sha1(np.ascontiguousarray(arr_y)).hexdigest() + + z_orderd = sorted(range(len(Z)), key=Z.__getitem__) + arr_z = array[xyz==2][z_orderd] + z_hash = sha1(np.ascontiguousarray(arr_z)).hexdigest() + + sorted_xy = sorted([x_hash, y_hash]) + combined_hash = (sorted_xy[0], sorted_xy[1], z_hash) + return combined_hash + + +
[docs] def run_test(self, X, Y, Z=None, tau_max=0, cut_off='2xtau_max'): + """Perform conditional independence test. + + Calls the dependence measure and signficicance test functions. The child + classes must specify a function get_dependence_measure and either or + both functions get_analytic_significance and get_shuffle_significance. + If recycle_residuals is True, also _get_single_residuals must be + available. + + Parameters + ---------- + X, Y, Z : list of tuples + X,Y,Z are of the form [(var, -tau)], where var specifies the + variable index and tau the time lag. + + tau_max : int, optional (default: 0) + Maximum time lag. This may be used to make sure that estimates for + different lags in X, Z, all have the same sample size. + + cut_off : {'2xtau_max', 'max_lag', 'max_lag_or_tau_max'} + How many samples to cutoff at the beginning. The default is + '2xtau_max', which guarantees that MCI tests are all conducted on + the same samples. For modeling, 'max_lag_or_tau_max' can be used, + which uses the maximum of tau_max and the conditions, which is + useful to compare multiple models on the same sample. Last, + 'max_lag' uses as much samples as possible. + + Returns + ------- + val, pval : Tuple of floats + The test statistic value and the p-value. + """ + + # Get the array to test on + array, xyz, XYZ = self._get_array(X, Y, Z, tau_max, cut_off) + X, Y, Z = XYZ + # Record the dimensions + dim, T = array.shape + # Ensure it is a valid array + if np.any(np.isnan(array)): + raise ValueError("nans in the array!") + + combined_hash = self._get_array_hash(array, xyz, XYZ) + + if combined_hash in self.cached_ci_results.keys(): + cached = True + val, pval = self.cached_ci_results[combined_hash] + else: + cached = False + # Get the dependence measure, reycling residuals if need be + val = self._get_dependence_measure_recycle(X, Y, Z, xyz, array) + # Get the p-value + pval = self.get_significance(val, array, xyz, T, dim) + self.cached_ci_results[combined_hash] = (val, pval) + + if self.verbosity > 1: + self._print_cond_ind_results(val=val, pval=pval, cached=cached, + conf=None) + # Return the value and the pvalue + return val, pval
+ +
[docs] def run_test_raw(self, x, y, z=None): + """Perform conditional independence test directly on input arrays x, y, z. + + Calls the dependence measure and signficicance test functions. The child + classes must specify a function get_dependence_measure and either or + both functions get_analytic_significance and get_shuffle_significance. + + Parameters + ---------- + x, y, z : arrays + x,y,z are of the form (samples, dimension). + + Returns + ------- + val, pval : Tuple of floats + + The test statistic value and the p-value. + """ + + if np.ndim(x) != 2 or np.ndim(y) != 2: + raise ValueError("x,y must be arrays of shape (samples, dimension)" + " where dimension can be 1.") + + if z is not None and np.ndim(z) != 2: + raise ValueError("z must be array of shape (samples, dimension)" + " where dimension can be 1.") + + if z is None: + # Get the array to test on + array = np.vstack((x.T, y.T)) + + # xyz is the dimension indicator + xyz = np.array([0 for i in range(x.shape[1])] + + [1 for i in range(y.shape[1])]) + + else: + # Get the array to test on + array = np.vstack((x.T, y.T, z.T)) + + # xyz is the dimension indicator + xyz = np.array([0 for i in range(x.shape[1])] + + [1 for i in range(y.shape[1])] + + [2 for i in range(z.shape[1])]) + + # Record the dimensions + dim, T = array.shape + # Ensure it is a valid array + if np.isnan(array).sum() != 0: + raise ValueError("nans in the array!") + # Get the dependence measure + val = self.get_dependence_measure(array, xyz) + # Get the p-value + pval = self.get_significance(val, array, xyz, T, dim) + # Return the value and the pvalue + return val, pval
+ + def _get_dependence_measure_recycle(self, X, Y, Z, xyz, array): + """Get the dependence_measure, optionally recycling residuals + + If self.recycle_residuals is True, also _get_single_residuals must be + available. + + Parameters + ---------- + X, Y, Z : list of tuples + X,Y,Z are of the form [(var, -tau)], where var specifies the + variable index and tau the time lag. + + xyz : array of ints + XYZ identifier array of shape (dim,). + + array : array + Data array of shape (dim, T) + + Return + ------ + val : float + Test statistic + """ + # Check if we are recycling residuals + if self.recycle_residuals: + # Get or calculate the cached residuals + x_resid = self._get_cached_residuals(X, Z, array, 0) + y_resid = self._get_cached_residuals(Y, Z, array, 1) + # Make a new residual array + array_resid = np.array([x_resid, y_resid]) + xyz_resid = np.array([0, 1]) + # Return the dependence measure + return self.get_dependence_measure(array_resid, xyz_resid) + # If not, return the dependence measure on the array and xyz + return self.get_dependence_measure(array, xyz) + + def _get_cached_residuals(self, x_nodes, z_nodes, array, target_var): + """ + Retrieve or calculate the cached residuals for the given node sets. + + Parameters + ---------- + x_nodes : list of tuples + List of nodes, X or Y normally. Used to key the residual cache + during lookup + + z_nodes : list of tuples + List of nodes, Z normally + + target_var : int + Key to differentiate X from Y. + x_nodes == X => 0, x_nodes == Y => 1 + + array : array + Data array of shape (dim, T) + + Returns + ------- + x_resid : array + Residuals calculated by _get_single_residual + """ + # Check if we have calculated these residuals + if self._keyfy(x_nodes, z_nodes) in list(self.residuals): + x_resid = self.residuals[self._keyfy(x_nodes, z_nodes)] + # If not, calculate the residuals + else: + x_resid = self._get_single_residuals(array, target_var=target_var) + if z_nodes: + self.residuals[self._keyfy(x_nodes, z_nodes)] = x_resid + # Return these residuals + return x_resid + +
[docs] def get_significance(self, val, array, xyz, T, dim, sig_override=None): + """ + Returns the p-value from whichever significance function is specified + for this test. If an override is used, then it will call a different + function then specified by self.significance + + Parameters + ---------- + val : float + Test statistic value. + + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + T : int + Sample length + + dim : int + Dimensionality, ie, number of features. + + sig_override : string + Must be in 'analytic', 'shuffle_test', 'fixed_thres' + + Returns + ------- + pval : float or numpy.nan + P-value. + """ + # Defaults to the self.significance member value + use_sig = self.significance + if sig_override is not None: + use_sig = sig_override + # Check if we are using the analytic significance + if use_sig == 'analytic': + pval = self.get_analytic_significance(value=val, T=T, dim=dim) + # Check if we are using the shuffle significance + elif use_sig == 'shuffle_test': + pval = self.get_shuffle_significance(array=array, + xyz=xyz, + value=val) + # Check if we are using the fixed_thres significance + elif use_sig == 'fixed_thres': + pval = self.get_fixed_thres_significance( + value=val, + fixed_thres=self.fixed_thres) + else: + raise ValueError("%s not known." % self.significance) + # Return the calculated value + return pval
+ +
[docs] def get_measure(self, X, Y, Z=None, tau_max=0): + """Estimate dependence measure. + + Calls the dependence measure function. The child classes must specify + a function get_dependence_measure. + + Parameters + ---------- + X, Y [, Z] : list of tuples + X,Y,Z are of the form [(var, -tau)], where var specifies the + variable index and tau the time lag. + + tau_max : int, optional (default: 0) + Maximum time lag. This may be used to make sure that estimates for + different lags in X, Z, all have the same sample size. + + Returns + ------- + val : float + The test statistic value. + + """ + # Make the array + array, xyz, (X, Y, Z) = self._get_array(X, Y, Z, tau_max) + D, T = array.shape + # Check it is valid + if np.isnan(array).sum() != 0: + raise ValueError("nans in the array!") + # Return the dependence measure + return self._get_dependence_measure_recycle(X, Y, Z, xyz, array)
+ +
[docs] def get_confidence(self, X, Y, Z=None, tau_max=0): + """Perform confidence interval estimation. + + Calls the dependence measure and confidence test functions. The child + classes can specify a function get_dependence_measure and + get_analytic_confidence or get_bootstrap_confidence. If confidence is + False, (numpy.nan, numpy.nan) is returned. + + Parameters + ---------- + X, Y, Z : list of tuples + X,Y,Z are of the form [(var, -tau)], where var specifies the + variable index and tau the time lag. + + tau_max : int, optional (default: 0) + Maximum time lag. This may be used to make sure that estimates for + different lags in X, Z, all have the same sample size. + + Returns + ------- + (conf_lower, conf_upper) : Tuple of floats + Upper and lower confidence bound of confidence interval. + """ + # Check if a confidence type has been defined + if self.confidence: + # Ensure the confidence level given makes sense + if self.conf_lev < .5 or self.conf_lev >= 1.: + raise ValueError("conf_lev = %.2f, " % self.conf_lev + + "but must be between 0.5 and 1") + half_conf = self.conf_samples * (1. - self.conf_lev)/2. + if self.confidence == 'bootstrap' and half_conf < 1.: + raise ValueError("conf_samples*(1.-conf_lev)/2 is %.2f" + % half_conf + ", must be >> 1") + + if self.confidence: + # Make and check the array + array, xyz, _ = self._get_array(X, Y, Z, tau_max, verbosity=0) + dim, T = array.shape + if np.isnan(array).sum() != 0: + raise ValueError("nans in the array!") + + # Check if we are using analytic confidence or bootstrapping it + if self.confidence == 'analytic': + val = self.get_dependence_measure(array, xyz) + (conf_lower, conf_upper) = \ + self.get_analytic_confidence(df=T-dim, + value=val, + conf_lev=self.conf_lev) + elif self.confidence == 'bootstrap': + # Overwrite analytic values + (conf_lower, conf_upper) = \ + self.get_bootstrap_confidence( + array, xyz, + conf_samples=self.conf_samples, + conf_blocklength=self.conf_blocklength, + conf_lev=self.conf_lev, verbosity=self.verbosity) + else: + raise ValueError("%s confidence estimation not implemented" + % self.confidence) + else: + return None + + # Cache the confidence interval + self.conf = (conf_lower, conf_upper) + # Return the confidence interval + return (conf_lower, conf_upper)
+ + def _print_cond_ind_results(self, val, pval=None, cached=None, conf=None): + """Print results from conditional independence test. + + Parameters + ---------- + val : float + Test stastistic value. + + pval : float, optional (default: None) + p-value + + conf : tuple of floats, optional (default: None) + Confidence bounds. + """ + printstr = " val = % .3f" % (val) + if pval is not None: + printstr += " | pval = %.5f" % (pval) + if conf is not None: + printstr += " | conf bounds = (%.3f, %.3f)" % ( + conf[0], conf[1]) + if cached is not None: + printstr += " %s" % ({0:"", 1:"[cached]"}[cached]) + + print(printstr) + +
[docs] def get_bootstrap_confidence(self, array, xyz, dependence_measure=None, + conf_samples=100, conf_blocklength=None, + conf_lev=.95, verbosity=0): + """Perform bootstrap confidence interval estimation. + + With conf_blocklength > 1 or None a block-bootstrap is performed. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + dependence_measure : function (default = self.get_dependence_measure) + Dependence measure function must be of form + dependence_measure(array, xyz) and return a numeric value + + conf_lev : float, optional (default: 0.9) + Two-sided confidence interval. + + conf_samples : int, optional (default: 100) + Number of samples for bootstrap. + + conf_blocklength : int, optional (default: None) + Block length for block-bootstrap. If None, the block length is + determined from the decay of the autocovariance as explained in + [1]_. + + verbosity : int, optional (default: 0) + Level of verbosity. + + Returns + ------- + (conf_lower, conf_upper) : Tuple of floats + Upper and lower confidence bound of confidence interval. + """ + # Check if a dependence measure if provided or if to use default + if not dependence_measure: + dependence_measure = self.get_dependence_measure + + # confidence interval is two-sided + c_int = 1. - (1. - conf_lev)/2. + dim, T = array.shape + + # If not block length is given, determine the optimal block length. + # This has a maximum of 10% of the time sample length + if conf_blocklength is None: + conf_blocklength = \ + self._get_block_length(array, xyz, mode='confidence') + # Determine the number of blocks total, rounding up for non-integer + # amounts + n_blks = int(math.ceil(float(T)/conf_blocklength)) + + # Print some information + if verbosity > 2: + print(" block_bootstrap confidence intervals" + " with block-length = %d ..." % conf_blocklength) + + # Generate the block bootstrapped distribution + bootdist = np.zeros(conf_samples) + for smpl in range(conf_samples): + # Get the starting indices for the blocks + blk_strt = self.random_state.integers(0, T - conf_blocklength + 1, n_blks) + # Get the empty array of block resampled values + array_bootstrap = \ + np.zeros((dim, n_blks*conf_blocklength), dtype=array.dtype) + # Fill the array of block resamples + for i in range(conf_blocklength): + array_bootstrap[:, i::conf_blocklength] = array[:, blk_strt + i] + # Cut to proper length + array_bootstrap = array_bootstrap[:, :T] + + bootdist[smpl] = dependence_measure(array_bootstrap, xyz) + + # Sort and get quantile + bootdist.sort() + conf_lower = bootdist[int((1. - c_int) * conf_samples)] + conf_upper = bootdist[int(c_int * conf_samples)] + # Return the confidance limits as a tuple + return (conf_lower, conf_upper)
+ + def _get_acf(self, series, max_lag=None): + """Returns autocorrelation function. + + Parameters + ---------- + series : 1D-array + data series to compute autocorrelation from + + max_lag : int, optional (default: None) + maximum lag for autocorrelation function. If None is passed, 10% of + the data series length are used. + + Returns + ------- + autocorr : array of shape (max_lag + 1,) + Autocorrelation function. + """ + # Set the default max lag + if max_lag is None: + max_lag = int(max(5, 0.1*len(series))) + # Initialize the result + autocorr = np.ones(max_lag + 1) + # Iterate over possible lags + for lag in range(1, max_lag + 1): + # Set the values + y1_vals = series[lag:] + y2_vals = series[:len(series) - lag] + # Calculate the autocorrelation + autocorr[lag] = np.corrcoef(y1_vals, y2_vals, ddof=0)[0, 1] + return autocorr + + def _get_block_length(self, array, xyz, mode): + """Returns optimal block length for significance and confidence tests. + + Determine block length using approach in Mader (2013) [Eq. (6)] which + improves the method of Pfeifer (2005) with non-overlapping blocks In + case of multidimensional X, the max is used. Further details in [1]_. + Two modes are available. For mode='significance', only the indices + corresponding to X are shuffled in array. For mode='confidence' all + variables are jointly shuffled. If the autocorrelation curve fit fails, + a block length of 5% of T is used. The block length is limited to a + maximum of 10% of T. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + mode : str + Which mode to use. + + Returns + ------- + block_len : int + Optimal block length. + """ + # Inject a dependency on siganal, optimize + from scipy import signal, optimize + # Get the shape of the array + dim, T = array.shape + # Initiailize the indices + indices = range(dim) + if mode == 'significance': + indices = np.where(xyz == 0)[0] + + # Maximum lag for autocov estimation + max_lag = int(0.1*T) + # Define the function to optimize against + def func(x_vals, a_const, decay): + return a_const * decay**x_vals + + # Calculate the block length + block_len = 1 + for i in indices: + # Get decay rate of envelope of autocorrelation functions + # via hilbert trafo + autocov = self._get_acf(series=array[i], max_lag=max_lag) + autocov[0] = 1. + hilbert = np.abs(signal.hilbert(autocov)) + # Try to fit the curve + try: + popt, _ = optimize.curve_fit( + f=func, + xdata=np.arange(0, max_lag+1), + ydata=hilbert, + ) + phi = popt[1] + # Formula of Pfeifer (2005) assuming non-overlapping blocks + l_opt = (4. * T * (phi / (1. - phi) + phi**2 / (1. - phi)**2)**2 + / (1. + 2. * phi / (1. - phi))**2)**(1. / 3.) + block_len = max(block_len, int(l_opt)) + except RuntimeError: + print("Error - curve_fit failed in block_shuffle, using" + " block_len = %d" % (int(.05 * T))) + block_len = max(int(.05 * T), 2) + # Limit block length to a maximum of 10% of T + block_len = min(block_len, int(0.1 * T)) + return block_len + + def _get_shuffle_dist(self, array, xyz, dependence_measure, + sig_samples, sig_blocklength=None, + verbosity=0): + """Returns shuffle distribution of test statistic. + + The rows in array corresponding to the X-variable are shuffled using + a block-shuffle approach. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + dependence_measure : object + Dependence measure function must be of form + dependence_measure(array, xyz) and return a numeric value + + sig_samples : int, optional (default: 100) + Number of samples for shuffle significance test. + + sig_blocklength : int, optional (default: None) + Block length for block-shuffle significance test. If None, the + block length is determined from the decay of the autocovariance as + explained in [1]_. + + verbosity : int, optional (default: 0) + Level of verbosity. + + Returns + ------- + null_dist : array of shape (sig_samples,) + Contains the sorted test statistic values estimated from the + shuffled arrays. + """ + + dim, T = array.shape + + x_indices = np.where(xyz == 0)[0] + dim_x = len(x_indices) + + if sig_blocklength is None: + sig_blocklength = self._get_block_length(array, xyz, + mode='significance') + + n_blks = int(math.floor(float(T)/sig_blocklength)) + # print 'n_blks ', n_blks + if verbosity > 2: + print(" Significance test with block-length = %d " + "..." % (sig_blocklength)) + + array_shuffled = np.copy(array) + block_starts = np.arange(0, T - sig_blocklength + 1, sig_blocklength) + + # Dividing the array up into n_blks of length sig_blocklength may + # leave a tail. This tail is later randomly inserted + tail = array[x_indices, n_blks*sig_blocklength:] + + null_dist = np.zeros(sig_samples) + for sam in range(sig_samples): + + blk_starts = self.random_state.permutation(block_starts)[:n_blks] + + x_shuffled = np.zeros((dim_x, n_blks*sig_blocklength), + dtype=array.dtype) + + for i, index in enumerate(x_indices): + for blk in range(sig_blocklength): + x_shuffled[i, blk::sig_blocklength] = \ + array[index, blk_starts + blk] + + # Insert tail randomly somewhere + if tail.shape[1] > 0: + insert_tail_at = self.random_state.choice(block_starts) + x_shuffled = np.insert(x_shuffled, insert_tail_at, + tail.T, axis=1) + + for i, index in enumerate(x_indices): + array_shuffled[index] = x_shuffled[i] + + null_dist[sam] = dependence_measure(array=array_shuffled, + xyz=xyz) + + return null_dist + +
[docs] def get_fixed_thres_significance(self, value, fixed_thres): + """Returns signficance for thresholding test. + + Returns 0 if numpy.abs(value) is smaller than fixed_thres and 1 else. + + Parameters + ---------- + value : number + Value of test statistic for unshuffled estimate. + + fixed_thres : number + Fixed threshold, is made positive. + + Returns + ------- + pval : bool + Returns 0 if numpy.abs(value) is smaller than fixed_thres and 1 + else. + + """ + if np.abs(value) < np.abs(fixed_thres): + pval = 1. + else: + pval = 0. + + return pval
+ + def _trafo2uniform(self, x): + """Transforms input array to uniform marginals. + + Assumes x.shape = (dim, T) + + Parameters + ---------- + x : array-like + Input array. + + Returns + ------- + u : array-like + array with uniform marginals. + """ + + def trafo(xi): + xisorted = np.sort(xi) + yi = np.linspace(1. / len(xi), 1, len(xi)) + return np.interp(xi, xisorted, yi) + + if np.ndim(x) == 1: + u = trafo(x) + else: + u = np.empty(x.shape) + for i in range(x.shape[0]): + u[i] = trafo(x[i]) + return u
+
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/independence_tests/oracle_conditional_independence.html b/docs/_build/html/_modules/tigramite/independence_tests/oracle_conditional_independence.html new file mode 100644 index 00000000..82913261 --- /dev/null +++ b/docs/_build/html/_modules/tigramite/independence_tests/oracle_conditional_independence.html @@ -0,0 +1,1650 @@ + + + + + + + tigramite.independence_tests.oracle_conditional_independence — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.independence_tests.oracle_conditional_independence

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+import numpy as np
+
+from collections import defaultdict, OrderedDict
+from itertools import combinations, permutations
+
+
+
[docs]class OracleCI: + r"""Oracle of conditional independence test X _|_ Y | Z given a graph. + + Class around link_coeff causal ground truth. X _|_ Y | Z is based on + assessing whether X and Y are d-separated given Z in the graph. + + Class can be used just like a Tigramite conditional independence class + (e.g., ParCorr). The main use is for unit testing of PCMCI methods. + + Parameters + ---------- + graph : array of shape [N, N, tau_max+1] + Causal graph. + links : dict + Dictionary of form {0:[(0, -1), ...], 1:[...], ...}. + Alternatively can also digest {0: [((0, -1), coeff, func)], ...}. + observed_vars : None or list, optional (default: None) + Subset of keys in links definining which variables are + observed. If None, then all variables are observed. + selection_vars : None or list, optional (default: None) + Subset of keys in links definining which variables are + selected (= always conditioned on at every time lag). + If None, then no variables are selected. + verbosity : int, optional (default: 0) + Level of verbosity. + """ + + # documentation + @property + def measure(self): + """ + Concrete property to return the measure of the independence test + """ + return self._measure + + def __init__(self, + links=None, + observed_vars=None, + selection_vars=None, + graph=None, + graph_is_mag=False, + tau_max=None, + verbosity=0): + + self.tau_max = tau_max + self.graph_is_mag = graph_is_mag + + if links is None: + if graph is None: + raise ValueError("Either links or graph must be specified!") + else: + # Get canonical DAG from graph, potentially interpreted as MAG + # self.tau_max = graph.shape[2] + (links, + observed_vars, + selection_vars) = self.get_links_from_graph(graph) + # # TODO make checks and tau_max? + # self.graph = graph + + + self.verbosity = verbosity + self._measure = 'oracle_ci' + self.confidence = None + self.links = links + self.N = len(links) + # self.tau_max = self._get_minmax_lag(self.links) + + # Initialize already computed dsepsets of X, Y, Z + self.dsepsets = {} + + # Initialize observed vars + self.observed_vars = observed_vars + if self.observed_vars is None: + self.observed_vars = range(self.N) + else: + if not set(self.observed_vars).issubset(set(range(self.N))): + raise ValueError("observed_vars must be subset of range(N).") + if self.observed_vars != sorted(self.observed_vars): + raise ValueError("observed_vars must ordered.") + if len(self.observed_vars) != len(set(self.observed_vars)): + raise ValueError("observed_vars must not contain duplicates.") + + self.selection_vars = selection_vars + + if self.selection_vars is not None: + if not set(self.selection_vars).issubset(set(range(self.N))): + raise ValueError("selection_vars must be subset of range(N).") + if self.selection_vars != sorted(self.selection_vars): + raise ValueError("selection_vars must ordered.") + if len(self.selection_vars) != len(set(self.selection_vars)): + raise ValueError("selection_vars must not contain duplicates.") + else: + self.selection_vars = [] + + # ToDO: maybe allow to use user-tau_max, otherwise deduced from links + self.graph = self.get_graph_from_links(tau_max=tau_max) + +
[docs] def set_dataframe(self, dataframe): + """Dummy function.""" + pass
+ + def _check_XYZ(self, X, Y, Z): + """Checks variables X, Y, Z. + + Parameters + ---------- + X, Y, Z : list of tuples + For a dependence measure I(X;Y|Z), Y is of the form [(varY, 0)], + where var specifies the variable index. X typically is of the form + [(varX, -tau)] with tau denoting the time lag and Z can be + multivariate [(var1, -lag), (var2, -lag), ...] . + + Returns + ------- + X, Y, Z : tuple + Cleaned X, Y, Z. + """ + # Get the length in time and the number of nodes + N = self.N + + # Remove duplicates in X, Y, Z + X = list(OrderedDict.fromkeys(X)) + Y = list(OrderedDict.fromkeys(Y)) + Z = list(OrderedDict.fromkeys(Z)) + + # If a node in Z occurs already in X or Y, remove it from Z + Z = [node for node in Z if (node not in X) and (node not in Y)] + + # Check that all lags are non-positive and indices are in [0,N-1] + XYZ = X + Y + Z + dim = len(XYZ) + # Ensure that XYZ makes sense + if np.array(XYZ).shape != (dim, 2): + raise ValueError("X, Y, Z must be lists of tuples in format" + " [(var, -lag),...], eg., [(2, -2), (1, 0), ...]") + if np.any(np.array(XYZ)[:, 1] > 0): + raise ValueError("nodes are %s, " % str(XYZ) + + "but all lags must be non-positive") + if (np.any(np.array(XYZ)[:, 0] >= N) + or np.any(np.array(XYZ)[:, 0] < 0)): + raise ValueError("var indices %s," % str(np.array(XYZ)[:, 0]) + + " but must be in [0, %d]" % (N - 1)) + if np.all(np.array(Y)[:, 1] != 0): + raise ValueError("Y-nodes are %s, " % str(Y) + + "but one of the Y-nodes must have zero lag") + + return (X, Y, Z) + + def _get_lagged_parents(self, var_lag, exclude_contemp=False, + only_non_causal_paths=False, X=None, causal_children=None): + """Helper function to yield lagged parents for var_lag from + self.links_coeffs. + + Parameters + ---------- + var_lag : tuple + Tuple of variable and lag which is assumed <= 0. + exclude_contemp : bool + Whether contemporaneous links should be exluded. + + Yields + ------ + Next lagged parent. + """ + + var, lag = var_lag + + for link_props in self.links[var]: + if len(link_props) == 3: + i, tau = link_props[0] + coeff = link_props[1] + else: + i, tau = link_props + coeff = 1. + if coeff != 0.: + if not (exclude_contemp and lag == 0): + if only_non_causal_paths: + if not ((i, lag + tau) in X and var_lag in causal_children): + yield (i, lag + tau) + else: + yield (i, lag + tau) + + def _get_children(self): + """Helper function to get children from links. + + Note that for children the lag is positive. + + Returns + ------- + children : dict + Dictionary of form {0:[(0, 1), (3, 0), ...], 1:[], ...}. + """ + + N = len(self.links) + children = dict([(j, []) for j in range(N)]) + + for j in range(N): + for link_props in self.links[j]: + if len(link_props) == 3: + i, tau = link_props[0] + coeff = link_props[1] + else: + i, tau = link_props + coeff = 1. + if coeff != 0.: + children[i].append((j, abs(tau))) + + return children + + def _get_lagged_children(self, var_lag, children, exclude_contemp=False, + only_non_causal_paths=False, X=None, causal_children=None): + """Helper function to yield lagged children for var_lag from children. + + Parameters + ---------- + var_lag : tuple + Tuple of variable and lag which is assumed <= 0. + children : dict + Dictionary of form {0:[(0, 1), (3, 0), ...], 1:[], ...}. + exclude_contemp : bool + Whether contemporaneous links should be exluded. + + Yields + ------ + Next lagged child. + """ + + var, lag = var_lag + # lagged_parents = [] + + for child in children[var]: + k, tau = child + if not (exclude_contemp and tau == 0): + # lagged_parents.append((i, lag + tau)) + if only_non_causal_paths: + if not (var_lag in X and (k, lag + tau) in causal_children): + yield (k, lag + tau) + else: + yield (k, lag + tau) + + def _get_non_blocked_ancestors(self, Y, conds=None, mode='non_repeating', + max_lag=None): + """Helper function to return the non-blocked ancestors of variables Y. + + Returns a dictionary of ancestors for every y in Y. y is a tuple ( + var, lag) where lag <= 0. All ancestors with directed paths towards y + that are not blocked by conditions in conds are included. In mode + 'non_repeating' an ancestor X^i_{t-\tau_i} with link X^i_{t-\tau_i} + --> X^j_{ t-\tau_j} is only included if X^i_{t'-\tau_i} --> X^j_{ + t'-\tau_j} is not already part of the ancestors. The most lagged + ancestor for every variable X^i defines the maximum ancestral time + lag, which is also returned. In mode 'max_lag' ancestors are included + up to the maximum time lag max_lag. + + It's main use is to return the maximum ancestral time lag max_lag of + y in Y for every variable in self.links_coeffs. + + Parameters + ---------- + Y : list of tuples + Of the form [(var, -tau)], where var specifies the variable + index and tau the time lag. + conds : list of tuples + Of the form [(var, -tau)], where var specifies the variable + index and tau the time lag. + mode : {'non_repeating', 'max_lag'} + Whether repeating links should be excluded or ancestors should be + followed up to max_lag. + max_lag : int + Maximum time lag to include ancestors. + + Returns + ------- + ancestors : dict + Includes ancestors for every y in Y. + max_lag : int + Maximum time lag to include ancestors. + """ + + def _repeating(link, seen_links): + """Returns True if a link or its time-shifted version is already + included in seen_links.""" + i, taui = link[0] + j, tauj = link[1] + + for seen_link in seen_links: + seen_i, seen_taui = seen_link[0] + seen_j, seen_tauj = seen_link[1] + + if (i == seen_i and j == seen_j + and abs(tauj-taui) == abs(seen_tauj-seen_taui)): + return True + + return False + + if conds is None: + conds = [] + + conds = [z for z in conds if z not in Y] + + N = len(self.links) + + # Initialize max. ancestral time lag for every N + if mode == 'non_repeating': + max_lag = 0 + else: + if max_lag is None: + raise ValueError("max_lag must be set in mode = 'max_lag'") + + if self.selection_vars is not None: + for selection_var in self.selection_vars: + # print (selection_var, conds) + # print([(selection_var, -tau_sel) for tau_sel in range(0, max_lag + 1)]) + conds += [(selection_var, -tau_sel) for tau_sel in range(0, max_lag + 1)] + + ancestors = dict([(y, []) for y in Y]) + + for y in Y: + j, tau = y # tau <= 0 + if mode == 'non_repeating': + max_lag = max(max_lag, abs(tau)) + seen_links = [] + this_level = [y] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + for par in self._get_lagged_parents(varlag): + i, tau = par + if par not in conds and par not in ancestors[y]: + if ((mode == 'non_repeating' and + not _repeating((par, varlag), seen_links)) or + (mode == 'max_lag' and + abs(tau) <= abs(max_lag))): + ancestors[y].append(par) + if mode == 'non_repeating': + max_lag = max(max_lag, + abs(tau)) + next_level.append(par) + seen_links.append((par, varlag)) + + this_level = next_level + + return ancestors, max_lag + + def _get_maximum_possible_lag(self, XYZ): + """Helper function to return the maximum time lag of any confounding path. + + This is still based on a conjecture! + + The conjecture states that if and only if X and Y are d-connected given Z + in a stationary DAG, then there exists a confounding path with a maximal + time lag (i.e., the node on that path with maximal lag) given as follows: + For any node in XYZ consider all non-repeating causal paths from the past + to that node, where non-repeating means that a link X^i_{t-\tau_i} + --> X^j_{ t-\tau_j} is only traversed if X^i_{t'-\tau_i} --> X^j_{ + t'-\tau_j} is not already part of that path. The most lagged + ancestor for every variable node in XYZ defines the maximum ancestral time + lag, which is returned. + + Parameters + ---------- + XYZ : list of tuples + Of the form [(var, -tau)], where var specifies the variable + index and tau the time lag. + + Returns + ------- + max_lag : int + Maximum time lag of non-repeating causal path ancestors. + """ + + def _repeating(link, seen_path): + """Returns True if a link or its time-shifted version is already + included in seen_links.""" + i, taui = link[0] + j, tauj = link[1] + + for index, seen_link in enumerate(seen_path[:-1]): + seen_i, seen_taui = seen_link + seen_j, seen_tauj = seen_path[index + 1] + + if (i == seen_i and j == seen_j + and abs(tauj-taui) == abs(seen_tauj-seen_taui)): + return True + + return False + + N = len(self.links) + + # Initialize max. ancestral time lag for every N + max_lag = 0 + + # Not sure whether this is relevant! + # if self.selection_vars is not None: + # for selection_var in self.selection_vars: + # # print (selection_var, conds) + # # print([(selection_var, -tau_sel) for tau_sel in range(0, max_lag + 1)]) + # conds += [(selection_var, -tau_sel) for tau_sel in range(0, max_lag + 1)] + + # ancestors = dict([(y, []) for y in Y]) + + for y in XYZ: + j, tau = y # tau <= 0 + max_lag = max(max_lag, abs(tau)) + + causal_path = [] + queue = [(y, causal_path)] + + while queue: + varlag, causal_path = queue.pop() + causal_path = [varlag] + causal_path + + for node in self._get_lagged_parents(varlag): + i, tau = node + + if (node not in causal_path): + + if len(causal_path) == 1: + queue.append((node, causal_path)) + continue + + if (len(causal_path) > 1) and not _repeating((node, varlag), causal_path): + + max_lag = max(max_lag, abs(tau)) + queue.append((node, causal_path)) + + if self.verbosity > 0: + print("Max. non-repeated ancestral time lag: ", max_lag) + + return max_lag + + def _get_descendants(self, W, children, max_lag, ignore_time_bounds=False): + """Get descendants of nodes in W up to time t. + + Includes the nodes themselves. + """ + + descendants = set(W) + + for w in W: + j, tau = w + this_level = [w] + while len(this_level) > 0: + next_level = [] + for varlag in this_level: + for child in self._get_lagged_children(varlag, children): + i, tau = child + if (child not in descendants + and (-max_lag <= tau <= 0 or ignore_time_bounds)): + descendants = descendants.union(set([child])) + next_level.append(child) + + this_level = next_level + + return list(descendants) + + def _has_any_path(self, X, Y, conds, max_lag=None, + starts_with=None, ends_with=None, + directed=False, + forbidden_nodes=None, + only_non_causal_paths=False, + check_optimality_cond=False, + optimality_cond_des_YM=None, + optimality_cond_Y=None, + only_collider_paths_with_vancs=False, + XYS=None, + return_path=False): + """Returns True if X and Y are d-connected by any open path. + + Does breadth-first search from both X and Y and meets in the middle. + Paths are walked according to the d-separation rules where paths can + only traverse motifs <-- v <-- or <-- v --> or --> v --> or + --> [v] <-- where [.] indicates that v is conditioned on. + Furthermore, paths nodes (v, t) need to fulfill max_lag <= t <= 0 + and links cannot be traversed backwards. + + Parameters + ---------- + X, Y : lists of tuples + Of the form [(var, -tau)], where var specifies the variable + index and tau the time lag. + conds : list of tuples + Of the form [(var, -tau)], where var specifies the variable + index and tau the time lag. + max_lag : int + Maximum time lag. + starts_with : {None, 'tail', 'arrohead'} + Whether to only consider paths starting with particular mark at X. + ends_with : {None, 'tail', 'arrohead'} + Whether to only consider paths ending with particular mark at Y. + """ + if max_lag is None: + if conds is None: + conds = [] + max_lag = self._get_maximum_possible_lag(X+Y+conds) + + def _walk_to_parents(v, fringe, this_path, other_path): + """Helper function to update paths when walking to parents.""" + found_connection = False + for w in self._get_lagged_parents(v, + only_non_causal_paths=only_non_causal_paths, X=X, + causal_children=causal_children): + # Cannot walk into conditioned parents and + # cannot walk beyond t or max_lag + i, t = w + + if w == x and starts_with == 'arrowhead': + continue + + if w == y and ends_with == 'arrowhead': + continue + + if (w not in conds and w not in forbidden_nodes and + # (w, v) not in seen_links and + t <= 0 and abs(t) <= max_lag): + # if ((w, 'tail') not in this_path and + # (w, None) not in this_path): + if (w not in this_path or + ('tail' not in this_path[w] and None not in this_path[w])): + if self.verbosity > 1: + print("Walk parent: %s --> %s " %(v, w)) + fringe.append((w, 'tail')) + if w not in this_path: + this_path[w] = {'tail' : (v, 'arrowhead')} + else: + this_path[w]['tail'] = (v, 'arrowhead') + # seen_links.append((v, w)) + # Determine whether X and Y are connected + # (w, None) indicates the start or end node X/Y + # if ((w, 'tail') in other_path + # or (w, 'arrowhead') in other_path + # or (w, None) in other_path): + if w in other_path: + found_connection = (w, 'tail') + if self.verbosity > 1: + print("Found connection: ", found_connection) + break + return found_connection, fringe, this_path + + def _walk_to_children(v, fringe, this_path, other_path): + """Helper function to update paths when walking to children.""" + found_connection = False + for w in self._get_lagged_children(v, children, + only_non_causal_paths=only_non_causal_paths, X=X, + causal_children=causal_children): + # You can also walk into conditioned children, + # but cannot walk beyond t or max_lag + i, t = w + + if w == x and starts_with == 'tail': + continue + + if w == y and ends_with == 'tail': + continue + + if (w not in forbidden_nodes and + # (w, v) not in seen_links and + t <= 0 and abs(t) <= max_lag): + # if ((w, 'arrowhead') not in this_path and + # (w, None) not in this_path): + if (w not in this_path or + ('arrowhead' not in this_path[w] and None not in this_path[w])): + if self.verbosity > 1: + print("Walk child: %s --> %s " %(v, w)) + fringe.append((w, 'arrowhead')) + # this_path[(w, 'arrowhead')] = (v, 'tail') + if w not in this_path: + this_path[w] = {'arrowhead' : (v, 'tail')} + else: + this_path[w]['arrowhead'] = (v, 'tail') + # seen_links.append((v, w)) + # Determine whether X and Y are connected + # If the other_path contains w with a tail, then w must + # NOT be conditioned on. Alternatively, if the other_path + # contains w with an arrowhead, then w must be + # conditioned on. + # if (((w, 'tail') in other_path and w not in conds) + # or ((w, 'arrowhead') in other_path and w in conds) + # or (w, None) in other_path): + if w in other_path: + if (('tail' in other_path[w] and w not in conds) or + ('arrowhead' in other_path[w] and w in conds) or + (None in other_path[w])): + found_connection = (w, 'arrowhead') + if self.verbosity > 1: + print("Found connection: ", found_connection) + break + return found_connection, fringe, this_path + + def _walk_fringe(this_level, fringe, this_path, other_path): + """Helper function to walk each fringe, i.e., the path from X and Y, + respectively.""" + found_connection = False + + if starts_with == 'arrowhead': + if len(this_level) == 1 and this_level[0] == (x, None): + (found_connection, fringe, + this_path) = _walk_to_parents(x, fringe, + this_path, other_path) + return found_connection, fringe, this_path, other_path + + elif starts_with == 'tail': + if len(this_level) == 1 and this_level[0] == (x, None): + (found_connection, fringe, + this_path) = _walk_to_children(x, fringe, + this_path, other_path) + return found_connection, fringe, this_path, other_path + + if ends_with == 'arrowhead': + if len(this_level) == 1 and this_level[0] == (y, None): + (found_connection, fringe, + this_path) = _walk_to_parents(y, fringe, + this_path, other_path) + return found_connection, fringe, this_path, other_path + + elif ends_with == 'tail': + if len(this_level) == 1 and this_level[0] == (y, None): + (found_connection, fringe, + this_path) = _walk_to_children(y, fringe, + this_path, other_path) + return found_connection, fringe, this_path, other_path + + for v, mark in this_level: + if v in conds: + if (mark == 'arrowhead' or mark == None) and directed is False: + # Motif: --> [v] <-- + # If standing on a condition and coming from an + # arrowhead, you can only walk into parents + (found_connection, fringe, + this_path) = _walk_to_parents(v, fringe, + this_path, other_path) + if found_connection: break + else: + if only_collider_paths_with_vancs: + continue + + if (mark == 'tail' or mark == None): + # Motif: <-- v <-- or <-- v --> + # If NOT standing on a condition and coming from + # a tail mark, you can walk into parents or + # children + (found_connection, fringe, + this_path) = _walk_to_parents(v, fringe, + this_path, other_path) + if found_connection: break + + if not directed: + (found_connection, fringe, + this_path) = _walk_to_children(v, fringe, + this_path, other_path) + if found_connection: break + + elif mark == 'arrowhead': + # Motif: --> v --> + # If NOT standing on a condition and coming from + # an arrowhead mark, you can only walk into + # children + (found_connection, fringe, + this_path) = _walk_to_children(v, fringe, + this_path, other_path) + if found_connection: break + + if check_optimality_cond and v[0] in self.observed_vars: + # if v is not descendant of YM + # and v is not connected to Y given X OS\Cu + # print("v = ", v) + cond4a = v not in optimality_cond_des_YM + cond4b = not self._has_any_path(X=[v], Y=optimality_cond_Y, + conds=conds + X, + max_lag=None, + starts_with=None, + ends_with=None, + forbidden_nodes=None, #list(prelim_Oset), + return_path=False) + # print(cond4a, cond4b) + if cond4a and cond4b: + (found_connection, fringe, + this_path) = _walk_to_parents(v, fringe, + this_path, other_path) + # print(found_connection) + if found_connection: break + + if self.verbosity > 1: + print("Updated fringe: ", fringe) + return found_connection, fringe, this_path, other_path + + def backtrace_path(): + """Helper function to get path from start point, end point, + and connection found.""" + + path = [found_connection[0]] + node, mark = found_connection + + if 'tail' in pred[node]: + mark = 'tail' + else: + mark = 'arrowhead' + # print(found_connection) + while path[-1] != x: + # print(path, node, mark, pred[node]) + prev_node, prev_mark = pred[node][mark] + path.append(prev_node) + if prev_mark == 'arrowhead': + if prev_node not in conds: + # if pass_through_colliders: + # if 'tail' in pred[prev_node] and pred[prev_node]['tail'] != (node, mark): + # mark = 'tail' + # else: + # mark = 'arrowhead' + # else: + mark = 'tail' + elif prev_node in conds: + mark = 'arrowhead' + elif prev_mark == 'tail': + if 'tail' in pred[prev_node] and pred[prev_node]['tail'] != (node, mark): + mark = 'tail' + else: + mark = 'arrowhead' + node = prev_node + + path.reverse() + + node, mark = found_connection + if 'tail' in succ[node]: + mark = 'tail' + else: + mark = 'arrowhead' + + while path[-1] != y: + next_node, next_mark = succ[node][mark] + path.append(next_node) + if next_mark == 'arrowhead': + if next_node not in conds: + # if pass_through_colliders: + # if 'tail' in succ[next_node] and succ[next_node]['tail'] != (node, mark): + # mark = 'tail' + # else: + # mark = 'arrowhead' + # else: + mark = 'tail' + elif next_node in conds: + mark = 'arrowhead' + elif next_mark == 'tail': + if 'tail' in succ[next_node] and succ[next_node]['tail'] != (node, mark): + mark = 'tail' + else: + mark = 'arrowhead' + node = next_node + + return path + + + if conds is None: + conds = [] + + if forbidden_nodes is None: + forbidden_nodes = [] + + conds = [z for z in conds if z not in Y and z not in X] + # print(X, Y, conds) + + if self.selection_vars is not None: + for selection_var in self.selection_vars: + conds += [(selection_var, -tau_sel) for tau_sel in range(0, max_lag + 1)] + + + N = len(self.links) + children = self._get_children() + + if only_non_causal_paths: + anc_Y_dict = self._get_non_blocked_ancestors(Y=Y, conds=None, mode='max_lag', + max_lag=max_lag)[0] + # print(anc_Y_dict) + anc_Y = [] + for y in Y: + anc_Y += anc_Y_dict[y] + des_X = self._get_descendants(X, children=children, max_lag=max_lag) + mediators = set(anc_Y).intersection(set(des_X)) - set(Y) - set(X) + + causal_children = list(mediators) + Y + else: + causal_children = None + + if only_collider_paths_with_vancs: + vancs_dict = self._get_non_blocked_ancestors(Y=XYS, conds=None, mode='max_lag', + max_lag=max_lag)[0] + vancs = set() + for xys in XYS: + vancs = vancs.union(set(vancs_dict[xys])) + vancs = list(vancs) + XYS + conds = vancs + # else: + # vancs = None + + # Iterate through nodes in X and Y + for x in X: + for y in Y: + + # seen_links = [] + # predecessor and successors in search + # (x, None) where None indicates start/end nodes, later (v, + # 'tail') or (w, 'arrowhead') indicate how a link ends at a node + pred = {x : {None: None}} + succ = {y : {None: None}} + + # initialize fringes, start with forward from X + forward_fringe = [(x, None)] + reverse_fringe = [(y, None)] + + while forward_fringe and reverse_fringe: + if len(forward_fringe) <= len(reverse_fringe): + if self.verbosity > 1: + print("Walk from X since len(X_fringe)=%d " + "<= len(Y_fringe)=%d" % (len(forward_fringe), + len(reverse_fringe))) + this_level = forward_fringe + forward_fringe = [] + (found_connection, forward_fringe, pred, + succ) = _walk_fringe(this_level, forward_fringe, pred, + succ) + + # print(pred) + if found_connection: + if return_path: + backtraced_path = backtrace_path() + return [(self.observed_vars.index(node[0]), node[1]) + for node in backtraced_path + if node[0] in self.observed_vars] + else: + return True + else: + if self.verbosity > 1: + print("Walk from Y since len(X_fringe)=%d " + "> len(Y_fringe)=%d" % (len(forward_fringe), + len(reverse_fringe))) + this_level = reverse_fringe + reverse_fringe = [] + (found_connection, reverse_fringe, succ, + pred) = _walk_fringe(this_level, reverse_fringe, succ, + pred) + + if found_connection: + if return_path: + backtraced_path = backtrace_path() + return [(self.observed_vars.index(node[0]), node[1]) + for node in backtraced_path + if node[0] in self.observed_vars] + else: + return True + + if self.verbosity > 1: + print("X_fringe = %s \n" % str(forward_fringe) + + "Y_fringe = %s" % str(reverse_fringe)) + + return False + + def _is_dsep(self, X, Y, Z, max_lag=None): + """Returns whether X and Y are d-separated given Z in the graph. + + X, Y, Z are of the form (var, lag) for lag <= 0. D-separation is + based on: + + 1. Assessing the maximum time lag max_lag possible for any confounding + path (see _get_maximum_possible_lag(...)). + + 2. Using the time series graph truncated at max_lag we then test + d-separation between X and Y conditional on Z using breadth-first + search of non-blocked paths according to d-separation rules. + + Parameters + ---------- + X, Y, Z : list of tuples + List of variables chosen for current independence test. + max_lag : int, optional (default: None) + Used here to constrain the _is_dsep function to the graph + truncated at max_lag instead of identifying the max_lag from + ancestral search. + + Returns + ------- + dseparated : bool, or path + True if X and Y are d-separated given Z in the graph. + """ + + N = len(self.links) + + if self.verbosity > 0: + print("Testing X=%s d-sep Y=%s given Z=%s in TSG" %(X, Y, Z)) + + if Z is None: + Z = [] + + if max_lag is not None: + # max_lags = dict([(j, max_lag) for j in range(N)]) + if self.verbosity > 0: + print("Set max. time lag to: ", max_lag) + else: + max_lag = self._get_maximum_possible_lag(X+Y+Z) + + # Store overall max. lag + self.max_lag = max_lag + + # _has_any_path is the main function that searches open paths + any_path = self._has_any_path(X, Y, conds=Z, max_lag=max_lag) + + if any_path: + dseparated = False + else: + dseparated = True + + return dseparated + +
[docs] def check_shortest_path(self, X, Y, Z, + max_lag=None, # compute_ancestors=False, + starts_with=None, ends_with=None, + forbidden_nodes=None, + directed=False, + only_non_causal_paths=False, + check_optimality_cond=False, + optimality_cond_des_YM=None, + optimality_cond_Y=None, + return_path=False): + """Returns path between X and Y given Z in the graph. + + X, Y, Z are of the form (var, lag) for lag <= 0. D-separation is + based on: + + 1. Assessing maximum time lag max_lag of last ancestor of any X, Y, Z + with non-blocked (by Z), non-repeating directed path towards X, Y, Z + in the graph. 'non_repeating' means that an ancestor X^i_{ t-\tau_i} + with link X^i_{t-\tau_i} --> X^j_{ t-\tau_j} is only included if + X^i_{t'-\tau_i} --> X^j_{ t'-\tau_j} for t'!=t is not already part of + the ancestors. + + 2. Using the time series graph truncated at max_lag we then test + d-separation between X and Y conditional on Z using breadth-first + search of non-blocked paths according to d-separation rules including + selection variables. + + Optionally only considers paths starting/ending with specific marks) + and makes available the ancestors up to max_lag of X, Y, Z. This may take + a very long time, however. + + Parameters + ---------- + X, Y, Z : list of tuples + List of variables chosen for testing paths. + max_lag : int, optional (default: None) + Used here to constrain the has_path function to the graph + truncated at max_lag instead of identifying the max_lag from + ancestral search. + compute_ancestors : bool + Whether to also make available the ancestors for X, Y, Z as + self.anc_all_x, self.anc_all_y, and self.anc_all_z, respectively. + starts_with : {None, 'tail', 'arrohead'} + Whether to only consider paths starting with particular mark at X. + ends_with : {None, 'tail', 'arrohead'} + Whether to only consider paths ending with particular mark at Y. + + Returns + ------- + path : list or False + Returns path or False if no path exists. + """ + + N = len(self.links) + + # Translate from observed_vars index to full variable set index + X = [(self.observed_vars[x[0]], x[1]) for x in X] + Y = [(self.observed_vars[y[0]], y[1]) for y in Y] + Z = [(self.observed_vars[z[0]], z[1]) for z in Z] + + # print(X) + # print(Y) + # print(Z) + + if check_optimality_cond: + optimality_cond_des_YM = [(self.observed_vars[x[0]], x[1]) + for x in optimality_cond_des_YM] + optimality_cond_Y = [(self.observed_vars[x[0]], x[1]) + for x in optimality_cond_Y] + + # Get the array to test on + X, Y, Z = self._check_XYZ(X, Y, Z) + + if self.verbosity > 0: + print("Testing X=%s d-sep Y=%s given Z=%s in TSG" %(X, Y, Z)) + + if max_lag is not None: + # max_lags = dict([(j, max_lag) for j in range(N)]) + if self.verbosity > 0: + print("Set max. time lag to: ", max_lag) + else: + max_lag = self._get_maximum_possible_lag(X+Y+Z) + + # Store overall max. lag + self.max_lag = max_lag + + # _has_any_path is the main function that searches open paths + any_path = self._has_any_path(X, Y, conds=Z, max_lag=max_lag, + starts_with=starts_with, ends_with=ends_with, + return_path=return_path, + directed=directed, + only_non_causal_paths=only_non_causal_paths, + check_optimality_cond=check_optimality_cond, + optimality_cond_des_YM=optimality_cond_des_YM, + optimality_cond_Y=optimality_cond_Y, + forbidden_nodes=forbidden_nodes) + + if any_path: + if return_path: + any_path_observed = [(self.observed_vars.index(node[0]), node[1]) for node in any_path + if node[0] in self.observed_vars] + else: + any_path_observed = True + else: + any_path_observed = False + + if self.verbosity > 0: + print("_has_any_path = ", any_path) + print("_has_any_path_obs = ", any_path_observed) + + + # if compute_ancestors: + # if self.verbosity > 0: + # print("Compute ancestors.") + + # # Get ancestors up to maximum ancestral time lag incl. repeated + # # links + # self.anc_all_x, _ = self._get_non_blocked_ancestors(X, conds=Z, + # mode='max_lag', max_lag=max_lag) + # self.anc_all_y, _ = self._get_non_blocked_ancestors(Y, conds=Z, + # mode='max_lag', max_lag=max_lag) + # self.anc_all_z, _ = self._get_non_blocked_ancestors(Z, conds=Z, + # mode='max_lag', max_lag=max_lag) + + return any_path_observed
+ +
[docs] def run_test(self, X, Y, Z=None, tau_max=0, cut_off='2xtau_max', + verbosity=0): + """Perform oracle conditional independence test. + + Calls the d-separation function. + + Parameters + ---------- + X, Y, Z : list of tuples + X,Y,Z are of the form [(var, -tau)], where var specifies the + variable index in the observed_vars and tau the time lag. + tau_max : int, optional (default: 0) + Not used here. + cut_off : {'2xtau_max', 'max_lag', 'max_lag_or_tau_max'} + Not used here. + + Returns + ------- + val, pval : Tuple of floats + The test statistic value and the p-value. + """ + + # Translate from observed_vars index to full variable set index + X = [(self.observed_vars[x[0]], x[1]) for x in X] + Y = [(self.observed_vars[y[0]], y[1]) for y in Y] + Z = [(self.observed_vars[z[0]], z[1]) for z in Z] + + # Get the array to test on + X, Y, Z = self._check_XYZ(X, Y, Z) + + if not str((X, Y, Z)) in self.dsepsets: + self.dsepsets[str((X, Y, Z))] = self._is_dsep(X, Y, Z) + + if self.dsepsets[str((X, Y, Z))]: + val = 0. + pval = 1. + else: + val = 1. + pval = 0. + + if verbosity > 1: + self._print_cond_ind_results(val=val, pval=pval, cached=False, + conf=None) + # Return the value and the pvalue + return val, pval
+ +
[docs] def get_measure(self, X, Y, Z=None, tau_max=0): + """Returns dependence measure. + + Returns 0 if X and Y are d-separated given Z in the graph and 1 else. + + Parameters + ---------- + X, Y [, Z] : list of tuples + X,Y,Z are of the form [(var, -tau)], where var specifies the + variable index in the observed_vars and tau the time lag. + + tau_max : int, optional (default: 0) + Maximum time lag. This may be used to make sure that estimates for + different lags in X, Z, all have the same sample size. + + Returns + ------- + val : float + The test statistic value. + + """ + + # Translate from observed_vars index to full variable set index + X = [(self.observed_vars[x[0]], x[1]) for x in X] + Y = [(self.observed_vars[y[0]], y[1]) for y in Y] + Z = [(self.observed_vars[z[0]], z[1]) for z in Z] + + # Check XYZ + X, Y, Z = _check_XYZ(X, Y, Z) + + if not str((X, Y, Z)) in self.dsepsets: + self.dsepsets[str((X, Y, Z))] = self._is_dsep(X, Y, Z) + + if self.dsepsets[str((X, Y, Z))]: + return 0. + else: + return 1.
+ + def _print_cond_ind_results(self, val, pval=None, cached=None, conf=None): + """Print results from conditional independence test. + + Parameters + ---------- + val : float + Test stastistic value. + pval : float, optional (default: None) + p-value + conf : tuple of floats, optional (default: None) + Confidence bounds. + """ + printstr = " val = %.3f" % (val) + if pval is not None: + printstr += " | pval = %.5f" % (pval) + if conf is not None: + printstr += " | conf bounds = (%.3f, %.3f)" % ( + conf[0], conf[1]) + if cached is not None: + printstr += " %s" % ({0:"", 1:"[cached]"}[cached]) + + print(printstr) + +
[docs] def get_model_selection_criterion(self, j, parents, tau_max=0): + """ + Base class assumption that this is not implemented. Concrete classes + should override when possible. + """ + raise NotImplementedError("Model selection not"+\ + " implemented for %s" % self.measure)
+ + def _reverse_patt(self, patt): + """Inverts a link pattern""" + + if patt == "": + return "" + + left_mark, middle_mark, right_mark = patt[0], patt[1], patt[2] + if left_mark == "<": + new_right_mark = ">" + else: + new_right_mark = left_mark + if right_mark == ">": + new_left_mark = "<" + else: + new_left_mark = right_mark + + return new_left_mark + middle_mark + new_right_mark + + + + + def _get_minmax_lag(self, links): + """Helper function to retrieve tau_min and tau_max from links + """ + + N = len(links) + + # Get maximum time lag + min_lag = np.inf + max_lag = 0 + for j in range(N): + for link_props in links[j]: + if len(link_props) == 3: + i, lag = link_props[0] + coeff = link_props[1] + else: + i, lag = link_props + coeff = 1. + # func = link_props[2] + if coeff != 0.: + min_lag = min(min_lag, abs(lag)) + max_lag = max(max_lag, abs(lag)) + return min_lag, max_lag + + + +
[docs] def get_confidence(self, X, Y, Z=None, tau_max=0): + """For compatibility with PCMCI. + + Returns + ------- + None + """ + return None
+ +if __name__ == '__main__': + + import tigramite.plotting as tp + from matplotlib import pyplot as plt + def lin_f(x): return x + + # Define the stationary DAG + links = {0 : [(0, -3), (1, 0)], 1: [(2, -2)], 2: [(1, -2)]} + observed_vars = [0, 1, 2] + + oracle = OracleCI(links=links, + observed_vars=observed_vars, + graph_is_mag=True, + # selection_vars=selection_vars, + # verbosity=2 + ) + graph = oracle.graph + print(graph[:,:,0]) + + tp.plot_time_series_graph(graph=graph, var_names=None, figsize=(5, 5), + save_name="/home/rung_ja/Downloads/tsg.pdf") + + X = [(0, 0)] + Y = [(2, 0)] + Z = [] + # node = (3, 0) + # prelim_Oset = set([(3, 0)]) + # S = set([]) + # collider_path_nodes = set([]) + path = oracle._has_any_path(X=X, Y=Y, + conds=Z, + max_lag=8, + starts_with='arrowhead', + ends_with='arrowhead', + forbidden_nodes=None, + return_path=True) + print(path) + + print("-------------------------------") + print(oracle._get_maximum_possible_lag(X+Z)) #(X = X, Y = Y, Z = Z)) + + # cond_ind_test = OracleCI(graph=graph) + # links, observed_vars, selection_vars = cond_ind_test.get_links_from_graph(graph) + # print("{") + # for j in links.keys(): + # parents = repr([(p, 'coeff', 'lin_f') for p in links[j]]) + # print(f"{j: 1d}" ":" f"{parents:s},") + # print(repr(observed_vars)) + # cond_ind_test = OracleCI(graph=graph, verbosity=2) + + # X = [(0, 0)] + # Y = [(2, 0)] + # Z = [(7, 0), (3, 0), (6, 0), (5, 0), (4, 0)] #(1, -3), (1, -2), (0, -2), (0, -1), (0, -3)] + # #(j, -2) for j in range(N)] + [(j, 0) for j in range(N)] + + # # print(oracle._get_non_blocked_ancestors(Z, Z=None, mode='max_lag', + # # max_lag=2)) + # # cond_ind_test = OracleCI(links, observed_vars=observed_vars, verbosity=2) + + # print(cond_ind_test.get_shortest_path(X=X, Y=Y, Z=Z, + # max_lag=None, compute_ancestors=False, + # backdoor=True)) + + # anc_x=None #oracle.anc_all_x[X[0]] + # anc_y=None #oracle.anc_all_y[Y[0]] + # anc_xy=None # [] + # # # for z in Z: + # # # anc_xy += oracle.anc_all_z[z] + + # fig, ax = tp.plot_tsg(links, + # X=[(observed_vars[x[0]], x[1]) for x in X], + # Y=[(observed_vars[y[0]], y[1]) for y in Y], + # Z=[(observed_vars[z[0]], z[1]) for z in Z], + # anc_x=anc_x, anc_y=anc_y, + # anc_xy=anc_xy) + + # fig.savefig("/home/rung_ja/Downloads/tsg.pdf") +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/independence_tests/parcorr.html b/docs/_build/html/_modules/tigramite/independence_tests/parcorr.html new file mode 100644 index 00000000..0ad353ca --- /dev/null +++ b/docs/_build/html/_modules/tigramite/independence_tests/parcorr.html @@ -0,0 +1,388 @@ + + + + + + + tigramite.independence_tests.parcorr — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.independence_tests.parcorr

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+from scipy import stats
+import numpy as np
+import sys
+
+from .independence_tests_base import CondIndTest
+
+
[docs]class ParCorr(CondIndTest): + r"""Partial correlation test. + + Partial correlation is estimated through linear ordinary least squares (OLS) + regression and a test for non-zero linear Pearson correlation on the + residuals. + + Notes + ----- + To test :math:`X \perp Y | Z`, first :math:`Z` is regressed out from + :math:`X` and :math:`Y` assuming the model + + .. math:: X & = Z \beta_X + \epsilon_{X} \\ + Y & = Z \beta_Y + \epsilon_{Y} + + using OLS regression. Then the dependency of the residuals is tested with + the Pearson correlation test. + + .. math:: \rho\left(r_X, r_Y\right) + + For the ``significance='analytic'`` Student's-*t* distribution with + :math:`T-D_Z-2` degrees of freedom is implemented. + + Parameters + ---------- + **kwargs : + Arguments passed on to Parent class CondIndTest. + """ + # documentation + @property + def measure(self): + """ + Concrete property to return the measure of the independence test + """ + return self._measure + + def __init__(self, **kwargs): + self._measure = 'par_corr' + self.two_sided = True + self.residual_based = True + + CondIndTest.__init__(self, **kwargs) + + def _get_single_residuals(self, array, target_var, + standardize=True, + return_means=False): + """Returns residuals of linear multiple regression. + + Performs a OLS regression of the variable indexed by target_var on the + conditions Z. Here array is assumed to contain X and Y as the first two + rows with the remaining rows (if present) containing the conditions Z. + Optionally returns the estimated regression line. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + target_var : {0, 1} + Variable to regress out conditions from. + + standardize : bool, optional (default: True) + Whether to standardize the array beforehand. Must be used for + partial correlation. + + return_means : bool, optional (default: False) + Whether to return the estimated regression line. + + Returns + ------- + resid [, mean] : array-like + The residual of the regression and optionally the estimated line. + """ + + dim, T = array.shape + dim_z = dim - 2 + + # Standardize + if standardize: + array -= array.mean(axis=1).reshape(dim, 1) + array /= array.std(axis=1).reshape(dim, 1) + if np.isnan(array).sum() != 0: + raise ValueError("nans after standardizing, " + "possibly constant array!") + + y = array[target_var, :] + + if dim_z > 0: + z = np.fastCopyAndTranspose(array[2:, :]) + beta_hat = np.linalg.lstsq(z, y, rcond=None)[0] + mean = np.dot(z, beta_hat) + resid = y - mean + else: + resid = y + mean = None + + if return_means: + return (resid, mean) + return resid + +
[docs] def get_dependence_measure(self, array, xyz): + """Return partial correlation. + + Estimated as the Pearson correlation of the residuals of a linear + OLS regression. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + Returns + ------- + val : float + Partial correlation coefficient. + """ + + x_vals = self._get_single_residuals(array, target_var=0) + y_vals = self._get_single_residuals(array, target_var=1) + val, _ = stats.pearsonr(x_vals, y_vals) + return val
+ +
[docs] def get_shuffle_significance(self, array, xyz, value, + return_null_dist=False): + """Returns p-value for shuffle significance test. + + For residual-based test statistics only the residuals are shuffled. + + Parameters + ---------- + array : array-like + data array with X, Y, Z in rows and observations in columns + + xyz : array of ints + XYZ identifier array of shape (dim,). + + value : number + Value of test statistic for unshuffled estimate. + + Returns + ------- + pval : float + p-value + """ + + x_vals = self._get_single_residuals(array, target_var=0) + y_vals = self._get_single_residuals(array, target_var=1) + array_resid = np.array([x_vals, y_vals]) + xyz_resid = np.array([0, 1]) + + null_dist = self._get_shuffle_dist(array_resid, xyz_resid, + self.get_dependence_measure, + sig_samples=self.sig_samples, + sig_blocklength=self.sig_blocklength, + verbosity=self.verbosity) + + pval = (null_dist >= np.abs(value)).mean() + + # Adjust p-value for two-sided measures + if pval < 1.: + pval *= 2. + + if return_null_dist: + return pval, null_dist + return pval
+ +
[docs] def get_analytic_significance(self, value, T, dim): + """Returns analytic p-value from Student's t-test for the Pearson + correlation coefficient. + + Assumes two-sided correlation. If the degrees of freedom are less than + 1, numpy.nan is returned. + + Parameters + ---------- + value : float + Test statistic value. + + T : int + Sample length + + dim : int + Dimensionality, ie, number of features. + + Returns + ------- + pval : float or numpy.nan + P-value. + """ + # Get the number of degrees of freedom + deg_f = T - dim + + if deg_f < 1: + pval = np.nan + elif abs(abs(value) - 1.0) <= sys.float_info.min: + pval = 0.0 + else: + trafo_val = value * np.sqrt(deg_f/(1. - value*value)) + # Two sided significance level + pval = stats.t.sf(np.abs(trafo_val), deg_f) * 2 + + return pval
+ +
[docs] def get_analytic_confidence(self, value, df, conf_lev): + """Returns analytic confidence interval for correlation coefficient. + + Based on Student's t-distribution. + + Parameters + ---------- + value : float + Test statistic value. + + df : int + degrees of freedom of the test + + conf_lev : float + Confidence interval, eg, 0.9 + + Returns + ------- + (conf_lower, conf_upper) : Tuple of floats + Upper and lower confidence bound of confidence interval. + """ + # Confidence interval is two-sided + c_int = (1. - (1. - conf_lev) / 2.) + + value_tdist = value * np.sqrt(df) / np.sqrt(1. - value**2) + conf_lower = (stats.t.ppf(q=1. - c_int, df=df, loc=value_tdist) + / np.sqrt(df + stats.t.ppf(q=1. - c_int, df=df, + loc=value_tdist)**2)) + conf_upper = (stats.t.ppf(q=c_int, df=df, loc=value_tdist) + / np.sqrt(df + stats.t.ppf(q=c_int, df=df, + loc=value_tdist)**2)) + return (conf_lower, conf_upper)
+ + +
[docs] def get_model_selection_criterion(self, j, parents, tau_max=0, corrected_aic=False): + """Returns Akaike's Information criterion modulo constants. + + Fits a linear model of the parents to variable j and returns the + score. Leave-one-out cross-validation is asymptotically equivalent to + AIC for ordinary linear regression models. Here used to determine + optimal hyperparameters in PCMCI, in particular the pc_alpha value. + + Parameters + ---------- + j : int + Index of target variable in data array. + + parents : list + List of form [(0, -1), (3, -2), ...] containing parents. + + tau_max : int, optional (default: 0) + Maximum time lag. This may be used to make sure that estimates for + different lags in X, Z, all have the same sample size. + + Returns: + score : float + Model score. + """ + + Y = [(j, 0)] + X = [(j, 0)] # dummy variable here + Z = parents + array, xyz = self.dataframe.construct_array(X=X, Y=Y, Z=Z, + tau_max=tau_max, + mask_type=self.mask_type, + return_cleaned_xyz=False, + do_checks=True, + verbosity=self.verbosity) + + dim, T = array.shape + + y = self._get_single_residuals(array, target_var=1, return_means=False) + # Get RSS + rss = (y**2).sum() + # Number of parameters + p = dim - 1 + # Get AIC + if corrected_aic: + score = T * np.log(rss) + 2. * p + (2.*p**2 + 2.*p)/(T - p - 1) + else: + score = T * np.log(rss) + 2. * p + return score
+
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/lpcmci.html b/docs/_build/html/_modules/tigramite/lpcmci.html new file mode 100644 index 00000000..518dfb74 --- /dev/null +++ b/docs/_build/html/_modules/tigramite/lpcmci.html @@ -0,0 +1,3499 @@ + + + + + + + tigramite.lpcmci — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.lpcmci

+import numpy as np
+from itertools import product, combinations
+from copy import deepcopy
+
+
[docs]class LPCMCI(): + r""" + LPCMCI is an algorithm for causal discovery in large-scale times series that allows for latent confounders and learns lag-specific + causal relationships. + + The algorithm is introduced and explained in: + [1] Gerhardus, A. & Runge, J. High-recall causal discovery for autocorrelated time series with latent confounders Advances in Neural + Information Processing Systems, 2020, 33. https://proceedings.neurips.cc/paper/2020/hash/94e70705efae423efda1088614128d0b-Abstract.html + + NOTE: + This method is still EXPERIMENTAL since the default settings of hyperparameters are still being fine-tuned. We actually invite + feedback on which work best in applications and numerical experiments. + + The main function, which applies the algorithm, is 'run_lpcmci_experimental'. + + Parameters passed to the constructor: + - dataframe: + Tigramite dataframe object that contains the the time series dataset \bold{X} + - cond_ind_test: + A conditional independence test object that specifies which conditional independence test CI is to be used + - verbosity: + Controls the verbose output self.run_lpcmci_experimental() and the function it calls. + + Parameters passed to self.run_lpcmci_experimental(): + Note: The default values are still being tuned and some parameters might be removed in the future. + - selected_links: dict or None + Dictionary of the form {0: [(3, 0), (0, -1), ...], 1:[], ...} that specifys which links are potentially present. All other links + are assumed to be absent. If None is passed all links are potentially present. + - tau_min: + The assumed minimum time lag, i.e., links with a lag smaller than tau_min are assumed to be absent. + - tau_max: + The maximum considered time lag, i.e., the algorithm learns a DPAG on a time window [t-\taumax, t] with \tau_max + 1 time steps. + It is *not* assumed that in the underlying time series DAG there are no links with a lag larger than \tau_max. + - pc_alpha: + The significance level of conditional independence tests + - n_preliminary_iterations: + Determines the number of iterations in the preliminary phase of LPCMCI, corresponding to the 'k' in LPCMCI(k) in [1]. + - max_cond_px: + Consider a pair of variables (X^i_{t-\tau}, X^j_t) with \tau > 0. In Algorithm S2 in [1] (here this is + self._run_ancestral_removal_phase()), the algorithm does not test for conditional independence given subsets of + apds_t(X^i_{t-\tau}, X^j_t, C(G)) of cardinality higher than max_cond_px. In Algorithm S3 in [1] (here this is + self._run_non_ancestral_removal_phase()), the algorithm does not test for conditional independence given subsets of + napds_t(X^i_{t-\tau}, X^j_t, C(G)) of cardinality higher than max_cond_px. + - max_p_global: + Restricts all conditional independence tests to conditioning sets with cardinality smaller or equal to max_p_global + - max_p_non_ancestral: + Restricts all conditional independence tests in the second removal phase (here this is self._run_dsep_removal_phase()) to + conditioning sets with cardinality smaller or equal to max_p_global + - max_q_global: + For each ordered pair (X^i_{t-\tau}, X^j_t) of adjacent variables and for each cardinality of the conditioning sets test at most + max_q_global many conditioning sets (when summing over all tested cardinalities more than max_q_global tests may be made) + - max_pds_set: + In Algorithm S3 (here this is self._run_non_ancestral_removal_phase()), the algorithm tests for conditional independence given + subsets of the relevant napds_t sets. If for a given link the set napds_t(X^j_t, X^i_{t-\tau}, C(G)) has more than max_pds_set many + elements (or, if the link is also tested in the opposite directed, if napds_t(X^i_{t-\tau}, X^j_t, C(G)) has more than max_pds_set + elements), this link is not tested. + - prelim_with_collider_rules: + If True: As in pseudocode + If False: Line 22 of Algorithm S2 in [1] is replaced by line 18 of Algorithm S2 when Algorithm S2 is called from the preliminary + phase (not in the last application of Algorithm S2 directly before Algorithm S3 is applied) + - parents_of_lagged: + If True: As in pseudocode + If False: The default conditioning set is pa(X^j_t, C(G)) rather than pa({X^j_t, X^i_{t-\tau}, C(G)) for tau > 0 + - prelim_only: + If True, stop after the preliminary phase. Can be used for detailed performance analysis + - break_once_separated: + If True: As in pseudocode + If False: The break commands are removed from Algorithms S2 and S3 in in [1] + - no_non_ancestral_phase: + If True, do not execute Algorithm S3. Can be used for detailed performance analysis + - use_a_pds_t_for_majority: + If True: As in pseudocode + If False: The search for separating sets instructed by the majority rule is made given subsets adj(X^j_t, C(G)) rather than + subsets of apds_t(X^j_t, X^i_{t-\tau}, C(G)) + - orient_contemp: + If orient_contemp == 1: As in pseudocode of Algorithm S2 in [1] + If orient_contemp == 2: Also orient contemporaneous links in line 18 of Algorithm S2 + If orient_comtemp == 0: Also not orient contemporaneous links in line 22 of Algorithm S2 + - update_middle_marks: + If True: As in pseudoce of Algorithms S2 and S3 in [1] + If False: The MMR rule is not applied + - prelim_rules: + If prelim_rules == 1: As in pseudocode of Algorithm S2 in [1] + If prelim_rules == 0: Exclude rules R9^prime and R10^\prime from line 18 in Algorithm S2 + - fix_all_edges_before_final_orientation: + When one of max_p_global, max_p_non_ancestral, max_q_global or max_pds_set is not np.inf, the algorithm may terminate although not + all middle marks are empty. All orientation rules are nevertheless sound, since the rules always check for the appropriate middle + marks. If fix_all_edges_before_final_orientation is True, all middle marks are set to the empty middle mark by force, followed by + another application of the rules. + - auto_first: + If True: As in pseudcode of Algorithms S2 and S3 in [1] + If False: Autodependency links are not prioritized even before contemporaneous links + - remember_only_parents: + If True: As in pseudocode of Algorithm 1 + If False: If X^i_{t-\tau} has been marked as ancestor of X^j_t at any point of a preliminary iteration but the link between + X^i_{t-\tau} and X^j_t was removed later, the link is nevertheless initialized with a tail at X^i_{t-\tau} in the re-initialization + - no_apr: + If no_apr == 0: As in pseudcode of Algorithms S2 and S3 in [1] + If no_apr == 1: The APR is not applied by Algorithm S2, except in line 22 of its last call directly before the call of Algorithm S3 + If no_apr == 2: The APR is never applied + + Return value of self.run_lpcmci_experimental(): + graph : array of shape (N, N, tau_max+1) + Resulting DPAG, representing the learned causal relationships. + val_matrix : array of shape (N, N, tau_max+1) + Estimated matrix of test statistic values regarding adjacencies. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values regarding adjacencies. + + A note on middle marks: + For convenience (to have strings of the same lengths) we here internally denote the empty middle mark by '-'. For post-processing + purposes all middle marks are set to the empty middle mark (here '-'). + + A note on wildcards: + The middle mark wildcard \ast and the edge mark wildcard are here represented as *, the edge mark wildcard \star as + + """ + + def __init__(self, dataframe, cond_ind_test, verbosity = 0): + """Class constructor. Store: + i) data + ii) conditional independence test object + iii) some instance attributes""" + + # Save the time series data that the algorithm operates on + self.dataframe = dataframe + + # Set the conditional independence test to be used + self.cond_ind_test = cond_ind_test + self.cond_ind_test.set_dataframe(self.dataframe) + + # Store the shape of the data in the T and N variables + self.T, self.N = self.dataframe.T, self.dataframe.N + + # Save verbosity + self.verbosity = verbosity + + +
[docs] def run_lpcmci_experimental(self, + selected_links = None, + tau_min = 0, + tau_max = 1, + pc_alpha = 0.05, + n_preliminary_iterations = 1, + max_cond_px = 0, + max_p_global = np.inf, + max_p_non_ancestral = np.inf, + max_q_global = np.inf, + max_pds_set = np.inf, + prelim_with_collider_rules = True, + parents_of_lagged = True, + prelim_only = False, + break_once_separated = True, + no_non_ancestral_phase = False, + use_a_pds_t_for_majority = True, + orient_contemp = 1, + update_middle_marks = True, + prelim_rules = 1, + fix_all_edges_before_final_orientation = True, + auto_first = True, + remember_only_parents = True, + no_apr = 0): + """Run LPCMCI on the dataset and with the conditional independence test passed to the class constructor and with the + options passed to this function.""" + + ####################################################################################################################### + ####################################################################################################################### + # Step 0: Initializations + self._initialize(selected_links, tau_min, tau_max, pc_alpha, n_preliminary_iterations, max_cond_px, max_p_global, + max_p_non_ancestral, max_q_global, max_pds_set, prelim_with_collider_rules, parents_of_lagged, prelim_only, + break_once_separated, no_non_ancestral_phase, use_a_pds_t_for_majority, orient_contemp, update_middle_marks, + prelim_rules, fix_all_edges_before_final_orientation, auto_first, remember_only_parents, no_apr) + + ####################################################################################################################### + ####################################################################################################################### + # Step 1: Preliminary phases + for i in range(self.n_preliminary_iterations): + + # Verbose output + if self.verbosity >= 1: + print("\n=======================================================") + print("=======================================================") + print("Starting preliminary phase {:2}".format(i + 1)) + + # In the preliminary phases, auto-lag links are tested with first priority. Among the auto-lag links, different lags are + # not distinguished. All other links have lower priority, among which those which shorter lags have higher priority + self._run_ancestral_removal_phase(prelim = True) + + # Verbose output + if self.verbosity >= 1: + print("\nPreliminary phase {:2} complete".format(i + 1)) + print("\nGraph:\n--------------------------------") + self._print_graph_dict() + print("--------------------------------") + + # When the option self.prelim_only is chosen, do not re-initialize in the last iteration + if i == self.n_preliminary_iterations - 1 and self.prelim_only: + break + + # Remember ancestorships, re-initialize and re-apply the remembered ancestorships + def_ancs = self.def_ancs + + if self.remember_only_parents: + smaller_def_ancs = dict() + for j in range(self.N): + smaller_def_ancs[j] = {(i, lag_i) for (i, lag_i) in def_ancs[j] if self._get_link((i, lag_i), (j, 0)) != ""} + def_ancs = smaller_def_ancs + + self._initialize_run_memory() + self._apply_new_ancestral_information(None, def_ancs) + + ####################################################################################################################### + ####################################################################################################################### + # Step 2: Full ancestral phase + if not self.prelim_only: + + # Verbose output + if self.verbosity >= 1: + print("\n=======================================================") + print("=======================================================") + print("Starting final ancestral phase") + + # In the standard ancestral phase, links are prioritized in the same as in the preliminary phases + self._run_ancestral_removal_phase() + + # Verbose output + if self.verbosity >= 1: + print("\nFinal ancestral phase complete") + print("\nGraph:\n--------------------------------") + self._print_graph_dict() + print("--------------------------------") + + ####################################################################################################################### + ####################################################################################################################### + # Step 3: Non-ancestral phase + if (not self.prelim_only) and (not self.no_non_ancestral_phase): + + # Verbose output + if self.verbosity >= 1: + print("\n=======================================================") + print("=======================================================") + print("Starting non-ancestral phase") + + # In the non-ancestral phase, large lags are prioritized + self._run_non_ancestral_removal_phase() + + # Verbose output + if self.verbosity >= 1: + print("\nNon-ancestral phase complete") + print("\nGraph:\n--------------------------------") + self._print_graph_dict() + print("--------------------------------") + + if self.fix_all_edges_before_final_orientation: + + # Verbose output + if self.verbosity >= 1: + print("\n=======================================================") + print("=======================================================") + print("Final rule application phase") + print("\nSetting all middle marks to '-'") + + self._fix_all_edges() + self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False) + + ####################################################################################################################### + ####################################################################################################################### + + # Verbose output + if self.verbosity >= 1: + print("\n=======================================================") + print("=======================================================") + print("\nLPCMCI has converged") + print("\nFinal graph:\n--------------------------------") + print("--------------------------------") + self._print_graph_dict() + print("--------------------------------") + print("--------------------------------\n") + + print("Max search set: {}".format(self.max_na_search_set_found)) + print("Max na-pds set: {}\n".format(self.max_na_pds_set_found)) + + # Post processing + self._fix_all_edges() + self.graph = self._dict2graph() + self.pval_max_matrix = self._dict_to_matrix(self.pval_max, self.tau_max, self.N, default = 0) + self.val_min_matrix = self._dict_to_matrix(self.pval_max_val, self.tau_max, self.N, default = 0) + self.cardinality_matrix = self._dict_to_matrix(self.pval_max_card, self.tau_max, self.N, default = 0) + + # Build and return the return dictionariy + return_dict = {"graph": self.graph, + "p_matrix": self.pval_max_matrix, + "val_matrix": self.val_min_matrix} + return return_dict
+ + + def _initialize(self, selected_links, tau_min, tau_max, pc_alpha, n_preliminary_iterations, max_cond_px, max_p_global, + max_p_non_ancestral, max_q_global, max_pds_set, prelim_with_collider_rules, parents_of_lagged, prelim_only, + break_once_separated, no_non_ancestral_phase, use_a_pds_t_for_majority, orient_contemp, update_middle_marks, prelim_rules, + fix_all_edges_before_final_orientation, auto_first, remember_only_parents, no_apr): + """Function for + i) saving the arguments passed to self.run_lpcmci_experimental() as instance attributes + ii) initializing various memory variables for storing the current graph, sepsets etc. + """ + + # Save the arguments passed to self.run_lpcmci_experimental() + self.selected_links = selected_links + self.tau_min = tau_min + self.tau_max = tau_max + self.pc_alpha = pc_alpha + self.n_preliminary_iterations = n_preliminary_iterations + self.max_cond_px = max_cond_px + self.max_p_global = max_p_global + self.max_p_non_ancestral = max_p_non_ancestral + self.max_q_global = max_q_global + self.max_pds_set = max_pds_set + self.prelim_with_collider_rules = prelim_with_collider_rules + self.parents_of_lagged = parents_of_lagged + self.prelim_only = prelim_only + self.break_once_separated = break_once_separated + self.no_non_ancestral_phase = no_non_ancestral_phase + self.use_a_pds_t_for_majority = use_a_pds_t_for_majority + self.orient_contemp = orient_contemp + self.update_middle_marks = update_middle_marks + self.prelim_rules = prelim_rules + self.fix_all_edges_before_final_orientation = fix_all_edges_before_final_orientation + self.auto_first = auto_first + self.remember_only_parents = remember_only_parents + self.no_apr = no_apr + + # Check that validity of tau_min and tau_max + self._check_tau_min_tau_max() + + # Check the validity of 'selected_links' + self._check_and_set_selected_links() + + # Rules to be executed at the end of a preliminary phase + self._rules_prelim_final= [["APR"], ["ER-08"], ["ER-02"], ["ER-01"], ["ER-09"], ["ER-10"]] + + # Rules to be executed within the while loop of a preliminary phase + self._rules_prelim = [["APR"], ["ER-08"], ["ER-02"], ["ER-01"]] if self.prelim_rules == 0 else self._rules_prelim_final + + # Full list of all rules + self._rules_all = [["APR"], ["ER-08"], ["ER-02"], ["ER-01"], ["ER-00-d"], ["ER-00-c"], ["ER-03"], ["R-04"], ["ER-09"], ["ER-10"], ["ER-00-b"], ["ER-00-a"]] + + # Initialize various memory variables for storing the current graph, sepsets etc. + self._initialize_run_memory() + + # Return + return True + + def _check_tau_min_tau_max(self): + """Check whether the choice of tau_min and tau_max is valid.""" + + if not 0 <= self.tau_min <= self.tau_max: + raise ValueError("tau_min = {}, ".format(self.tau_min) + \ + "tau_max = {}, ".format(self.tau_max) + \ + "but 0 <= tau_min <= tau_max required.") + + def _check_and_set_selected_links(self): + """If 'selected_links' is given check its validity, else set all links as selected.""" + + if self.selected_links is not None: + + # Check validity of keys + if set(self.selected_links.keys()) == set(range(self.N)): + + # Check validity of entries + var_allowed = set(range(self.N)) + var_entries = set(var for parents in self.selected_links.values() for var, _ in parents) + + lag_allowed = set (range(-self.tau_max, -self.tau_min + 1)) + lag_entries = set(lag for parents in self.selected_links.values() for _, lag in parents) + + if var_entries.issubset(var_allowed) and lag_entries.issubset(lag_allowed): + + # Check symmetry of lag-zero links + lag_zero = set((i, j) for j in range(self.N) for (i, lag) in self.selected_links[j] if lag == 0) + + check_symmetry = np.zeros((self.N, self.N)) + for (i, j) in lag_zero: + check_symmetry[i, j] = 1 + + if np.sum(np.transpose(check_symmetry) == check_symmetry) != self.N**2: + raise ValueError("Invalid 'selected_links': Zero-lag links must be symmetric.") + + else: + raise ValueError("Invalid link in at least one entry of 'selected_links'. Must be of the form (i, lag_i), where i in {0, 1, ..., N-1} and lag_i in {-tau_max, ..., -tau_min}.") + else: + raise ValueError("'selected_links' must be dictionary whose keys are exactly 0, 1, ..., N-1, where N is the number of component time series.") + + else: + self.selected_links = {j: [(i, -tau) for i in range(self.N) for tau in range(self.tau_min, self.tau_max + 1) if (tau > 0 or j != i)] for j in range(self.N)} + + def _initialize_run_memory(self): + """Function for initializing various memory variables for storing the current graph, sepsets etc.""" + + # Initialize the nested dictionary for storing the current graph. + # Syntax: self.graph_dict[j][(i, -tau)] gives the string representing the link from X^i_{t-tau} to X^j_t + self.graph_dict = {} + for j in range(self.N): + + self.graph_dict[j] = {(i, 0): "o?o" for i in range(self.N) if j != i} + + if self.max_cond_px == 0 and self.update_middle_marks: + self.graph_dict[j].update({(i, -tau): "oL>" for i in range(self.N) for tau in range(1, self.tau_max + 1)}) + else: + self.graph_dict[j].update({(i, -tau): "o?>" for i in range(self.N) for tau in range(1, self.tau_max + 1)}) + + # Initialize the nested dictionary for storing separating sets + # Syntax: self.sepsets[j][(i, -tau)] stores separating sets of X^i_{t-tau} to X^j_t. For tau = 0, i < j. + self.sepsets = {j: {(i, -tau): set() for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)} + + # Initialize dictionaries for storing known ancestorships, non-ancestorships, and ambiguous ancestorships + # Syntax: self.def_ancs[j] contains the set of all known ancestors of X^j_t. Equivalently for the others + self.def_ancs = {j: set() for j in range(self.N)} + self.def_non_ancs = {j: set() for j in range(self.N)} + self.ambiguous_ancestorships = {j: set() for j in range(self.N)} + + # Initialize nested dictionaries for saving the maximal p-value among all conditional independence tests of a given + # pair of variables as well as the corresponding test statistic values and conditioning set cardinalities + # Syntax: As for self.sepsets + self.pval_max = {j: {(i, -tau): -np.inf for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)} + self.pval_max_val = {j: {(i, -tau): np.inf for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)} + self.pval_max_card = {j: {(i, -tau): -np.inf for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)} + # Initialize a nested dictionary for caching na-pds-sets + # Syntax: self._na_pds_t[(i, t_i)][(j, t_j)] stores na_pds_t((i, t_i), (j, t_j)) + self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)} + + # Initialize a variable for remembering the maximal cardinality among all calculated na-pds-sets, as well as the + # maximial cardinality of any search set in the non-ancestral phase + self.max_na_search_set_found = -1 + self.max_na_pds_set_found = -1 + + # Apply the restriction imposed by tau_min + self._apply_tau_min_restriction() + + # Apply the restriction imposed by selected_links + self._apply_selected_links_restriction() + + # Return + return True + + def _apply_tau_min_restriction(self): + """Apply the restrictions imposed by a non-zero tau_min: + - Remove all links of lag smaller than tau_min from self.graph_dict + - Set the corresponding entries in self.pval_max, self.pval_max_val, and self.pval_max_card to None + """ + + for (i, j, tau) in product(range(self.N), range(self.N), range(0, self.tau_min)): + if tau > 0 or j != i: + self.graph_dict[j][(i, -tau)] = "" + + if tau > 0 or i < j: + self.pval_max[j][(i, -tau)] = np.inf + self.pval_max_val[j][(i, -tau)] = -np.inf + self.pval_max_card[j][(i, -tau)] = np.inf + + def _apply_selected_links_restriction(self): + """Apply the restrictions imposed by selected_links: + - Remove all links that have not been selected + - Set the corresponding entries in self.pval_max, self.pval_max_val, and self.pval_max_card to None + """ + + for (i, j, tau) in product(range(self.N), range(self.N), range(self.tau_min, self.tau_max + 1)): + if (tau > 0 or j != i) and (i, -tau) not in self.selected_links[j]: + self.graph_dict[j][(i, -tau)] = "" + + if (tau > 0 or i < j) and (i, -tau) not in self.selected_links[j]: + self.pval_max[j][(i, -tau)] = np.inf + self.pval_max_val[j][(i, -tau)] = -np.inf + self.pval_max_card[j][(i, -tau)] = np.inf + + def _run_ancestral_removal_phase(self, prelim = False): + """Run an ancestral edge removal phase, this is Algorithm S2""" + + # Iterate until convergence + # p_pc is the cardinality of the non-default part of the conditioning sets. The full conditioning sets may have + # higher cardinality due to default conditioning on known parents + p_pc = 0 + while_broken = False + while True: + + ########################################################################################################## + ### Run the next removal iteration ####################################################################### + + # Force-quit while loop when p_pc exceeds the limit put by self.max_p_global + if p_pc > self.max_p_global: + while_broken = True + break + + # Verbose output + if self.verbosity >= 1: + if p_pc == 0: + print("\nStarting test phase\n") + print("p = {}".format(p_pc)) + + # Variables to memorize the occurence and absence of certain events in the below edge removal phase + has_converged = True + any_removal = False + + # Generate the prioritized link list + if self.auto_first: + + link_list = [product(range(self.N), range(-self.tau_max, 0))] + link_list = link_list + [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)] + + else: + + link_list = [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)] + + + # Run through all elements of link_list. Each element of link_list specifies ordered pairs of variables whose + # connecting edges are then subjected to conditional independence tests + for links in link_list: + + # Memory variables for storing edges that are marked for removal + to_remove = {j: {} for j in range(self.N)} + + # Iterate through all edges specified by links. Note that since the variables paris are ordered, (A, B) and (B, A) + # are seen as different pairs. + for pair in links: + + # Decode the elements of links into pairs of variables (X, Y) + if len(pair) == 2: + X = (pair[0], pair[1]) + Y = (pair[0], 0) + else: + X = (pair[0], pair[2]) + Y = (pair[1], 0) + + # Do not test auto-links twice + if self.auto_first and X[0] == Y[0]: + continue + + ###################################################################################################### + ### Exclusion of links ############################################################################### + + # Exclude the current link if ... + # ... X = Y + if X[1] == 0 and X[0] == Y[0]: + continue + # ... X > Y + if self._is_smaller(Y, X): + continue + + # Get the current link + link = self._get_link(X, Y) + + # Moreover exclude the current link if ... + # ... X and Y are not adjacent anymore + if link == "": + continue + # ... the link is definitely part of G + if link[1] == "-": + continue + + ###################################################################################################### + ### Determine which tests the link will be subjected to ########################################### + + # Depending on the middle mark on the link between X and Y as well as on some global options, we may not need + # to search for separating set among the potential parents of Y and/or X. + test_Y = True if link[1] not in ["R", "!"] else False + test_X = True if (link[1] not in ["L", "!"] and (X[1] == 0 or (self.max_cond_px > 0 and self.max_cond_px >= p_pc))) else False + + ###################################################################################################### + ### Preparation PC search set and default conditioning set ########################################### + + if test_Y: + S_default_YX, S_search_YX = self._get_default_and_search_sets(Y, X, "ancestral") + + if test_X: + S_default_XY, S_search_XY = self._get_default_and_search_sets(X, Y, "ancestral") + + ###################################################################################################### + ### Middle mark updates ############################################################################## + + any_middle_mark_update = False + + # Note: Updating the middle marks here, within the for-loop, does not spoil order independence. In fact, this + # update does not influence the flow of the for-loop at all + if test_Y: + if len(S_search_YX) < p_pc: + # Note that X is smaller than Y. If S_search_YX exists and has fewer than p elements, X and Y are not + # d-separated by S \subset Par(Y). Therefore, the middle mark on the edge between X and Y can be updated + # with 'R' + self._apply_middle_mark(X, Y, "R") + else: + # Since S_search_YX exists and has hat least p_pc elements, the link between X and Y will be subjected to + # conditional independenc tests. Therefore, the algorithm has not converged yet. + has_converged = False + + if test_X: + if len(S_search_XY) < p_pc: + # Note that X is smaller than Y. If S_search_XY exists and has fewer than p elements, X and Y are not + # d-separated by S \subset Par(X). Therefore, the middle mark on the edge between X and Y can be updated + # with 'L' + self._apply_middle_mark(X, Y, "L") + else: + # Since S_search_YX exists and has hat least p_pc elements, the link between X and Y will be subjected to + # conditional independenc tests. Therefore, the algorithm has not converged yet. + has_converged = False + + ###################################################################################################### + + ###################################################################################################### + ### Tests for conditional independence ############################################################### + + # If option self.break_once_separated is True, the below for-loops will be broken immediately once a separating set + # has been found. In conjunction with the modified majority rule employed for orienting links, order independence + # (with respect to the index 'i' on X^i_t) then requires that the tested conditioning sets are ordered in an order + # independent way. Here, the minimal effect size of previous conditional independence tests serve as an order + # independent order criterion. + if self.break_once_separated or not np.isinf(self.max_q_global): + if test_Y: + S_search_YX = self._sort_search_set(S_search_YX, Y) + if test_X: + S_search_XY = self._sort_search_set(S_search_XY, X) + + # Run through all cardinality p_pc subsets of S_search_YX + if test_Y: + + q_count = 0 + for S_pc in combinations(S_search_YX, p_pc): + + q_count = q_count + 1 + if q_count > self.max_q_global: + break + + # Build the full conditioning set + Z = set(S_pc) + Z = Z.union(S_default_YX) + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max) + + if self.verbosity >= 2: + print("ANC(Y): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in S_default_YX]), ' '.join([str(z) for z in S_pc]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z)) + + # Check whether test result was significant + if pval > self.pc_alpha: + + # Mark the edge from X to Y for removal and save sepset + to_remove[Y[0]][X] = True + self._save_sepset(X, Y, (frozenset(Z), "wm")) + + # Verbose output + if self.verbosity >= 1: + print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX)) + + if self.break_once_separated: + break + + # Run through all cardinality p_pc subsets of S_search_XY + if test_X: + + q_count = 0 + for S_pc in combinations(S_search_XY, p_pc): + + q_count = q_count + 1 + if q_count > self.max_q_global: + break + + # Build the full conditioning set + Z = set(S_pc) + Z = Z.union(S_default_XY) + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max) + + if self.verbosity >= 2: + print("ANC(X): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in S_default_XY]), ' '.join([str(z) for z in S_pc]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z)) + + # Check whether test result was significant + if pval > self.pc_alpha: + + # Mark the edge from X to Y for removal and save sepset + to_remove[Y[0]][X] = True + self._save_sepset(X, Y, (frozenset(Z), "wm")) + + # Verbose output + if self.verbosity >= 1: + print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_XY)) + + if self.break_once_separated: + break + + # for pair in links + + ########################################################################################################## + ### Remove edges marked for removal in to_remove ######################################################### + + # Run through all of the nested dictionary + for j in range(self.N): + for (i, lag_i) in to_remove[j].keys(): + + # Remember that at least one edge has been removed, remove the edge + any_removal = True + self._write_link((i, lag_i), (j, 0), "", verbosity = self.verbosity) + + # end for links in link_list + + # Verbose output + if self.verbosity >= 1: + print("\nTest phase complete") + + ############################################################################################################## + ### Orientations and next step ############################################################################### + + if any_removal: + # At least one edge was removed or at least one middle mark has been updated. Therefore: i) apply the restricted set of + # orientation rules, ii) restart the while loop at p_pc = 0, unless all edges have converged, then break the while loop + + only_lagged = False if self.orient_contemp == 2 else True + any_update = self._run_orientation_phase(rule_list = self._rules_prelim, only_lagged = only_lagged) + + # If the orientation phase made a non-trivial update, then restart the while loop. Else increase p_pc by one + if any_update: + if self.max_cond_px == 0 and self.update_middle_marks: + self._update_middle_marks() + p_pc = 0 + + else: + p_pc = p_pc + 1 + + else: + # The graph has not changed at all in this iteration of the while loop. Therefore, if all edges have converged, break the + # while loop. If at least one edge has not yet converged, increase p_pc by one. + + if has_converged: + break + else: + p_pc = p_pc + 1 + + # end while True + + ################################################################################################################## + ### Consistency test and middle mark update ###################################################################### + + # Run through the entire graph + for j in range(self.N): + for (i, lag_i) in self.graph_dict[j].keys(): + + X = (i, lag_i) + Y = (j, 0) + + if self._is_smaller(Y, X): + continue + + # Consider only those links that are still part G + link = self._get_link((i, lag_i), (j, 0)) + if len(link) > 0: + + # Consistency check + if not while_broken: + assert link[1] != "?" + assert link[1] != "L" + assert ((link[1] != "R") or (lag_i < 0 and (self.max_cond_px > 0 or not self.update_middle_marks)) + or (self.no_apr != 0)) + + + # Update all middle marks to '!' + if link[1] not in ["-", "!"]: + self._write_link((i, lag_i), (j, 0), link[0] + "!" + link[2]) + + + ################################################################################################################## + ### Final rule applications ###################################################################################### + + if not prelim or self.prelim_with_collider_rules: + + if not prelim: + self.no_apr = self.no_apr - 1 + + any_update = self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False) + + if self.max_cond_px == 0 and self.update_middle_marks and any_update: + self._update_middle_marks() + + else: + + only_lagged = False if self.orient_contemp >= 1 else True + any_update = self._run_orientation_phase(rule_list = self._rules_prelim_final, only_lagged = only_lagged) + + if self.max_cond_px == 0 and self.update_middle_marks and any_update: + self._update_middle_marks() + + # Return + return True + + + def _run_non_ancestral_removal_phase(self): + """Run the non-ancestral edge removal phase, this is Algorithm S3""" + + # Update of middle marks + self._update_middle_marks() + + # This function initializeds self._graph_full_dict, a nested dictionary representing the graph including links that are + # forward in time. This will make the calculcation of na-pds-t sets easier. + self._initialize_full_graph() + + # Iterate until convergence. Here, p_pc is the cardinality of the non-default part of the conditioning sets. The full + # conditioning sets may have higher cardinality due to default conditioning on known parents + p_pc = 0 + while True: + + ########################################################################################################## + ### Run the next removal iteration ####################################################################### + + # Force-quit while loop when p_pc exceeds the limit put by self.max_p_global or self.max_p_non_ancestral + if p_pc > self.max_p_global or p_pc > self.max_p_non_ancestral: + break + + # Verbose output + if self.verbosity >= 1: + if p_pc == 0: + print("\nStarting test phase\n") + print("p = {}".format(p_pc)) + + # Variables to memorize the occurence and absence of certain events in the below edge removal phase + has_converged = True + any_removal = False + + # Generate the prioritized link list + if self.auto_first: + + link_list = [product(range(self.N), range(-self.tau_max, 0))] + link_list = link_list + [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)] + + else: + + link_list = [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)] + + + # Run through all elements of link_list. Each element of link_list specifies ordered pairs of variables whose connecting + # edges are then subjected to conditional independence tests + for links in link_list: + + # Memory variables for storing edges that are marked for removal + to_remove = {j: {} for j in range(self.N)} + + # Iterate through all edges specified by links. Note that since the variables paris are ordered, (A, B) and (B, A) are + # seen as different pairs. + for pair in links: + + if len(pair) == 2: + X = (pair[0], pair[1]) + Y = (pair[0], 0) + else: + X = (pair[0], pair[2]) + Y = (pair[1], 0) + + # Do not test auto-links twice + if self.auto_first and X[0] == Y[0]: + continue + + ###################################################################################################### + ### Exclusion of links ############################################################################### + + # Exclude the current link if ... + # ... X = Y + if X[1] == 0 and X[0] == Y[0]: + continue + # ... X > Y + if self._is_smaller(Y, X): + continue + + # Get the current link + link = self._get_link(X, Y) + + # Exclude the current link if ... + if link == "": + continue + # ... the link is definitely part of G + if link[1] == "-": + continue + + ###################################################################################################### + ### Determine which tests the link will be subjected to ############################################# + + # The algorithm always searches for separating sets in na-pds-t(Y, X). Depending on whether the X and Y are + # contemporaneous on some global options, the algorithm may also search for separating sets in na-pds-t(X, Y) + test_X = True if (X[1] == 0 or (self.max_cond_px > 0 and self.max_cond_px >= p_pc)) else False + + ###################################################################################################### + ### Preparation of default conditioning sets and PC search sets ###################################### + + # Verbose output + if self.verbosity >= 2: + print("_get_na_pds_t ") + + S_default_YX, S_search_YX = self._get_default_and_search_sets(Y, X, "non-ancestral") + + self.max_na_search_set_found = max(self.max_na_search_set_found, len(S_search_YX)) + + if test_X: + S_default_XY, S_search_XY = self._get_default_and_search_sets(X, Y, "non-ancestral") + + self.max_na_search_set_found = max(self.max_na_search_set_found, len(S_search_XY)) + + # If the search set exceeds the specified bounds, do not test this link + if len(S_search_YX) > self.max_pds_set or (test_X and len(S_search_XY) > self.max_pds_set): + continue + + ###################################################################################################### + + ###################################################################################################### + ### Middle mark updates ############################################################################## + + # Note: Updating the middle marks here, within the for-loop, does not spoil order independence. In fact, this + # update does not influence the flow of the for-loop at all + if len(S_search_YX) < p_pc or (test_X and len(S_search_XY) < p_pc): + # Mark the link from X to Y as converged, remember the fixation, then continue + self._write_link(X, Y, link[0] + "-" + link[2], verbosity = self.verbosity) + continue + + else: + has_converged = False + + + ###################################################################################################### + ### Tests for conditional independence ############################################################### + + # If option self.break_once_separated is True, the below for-loops will be broken immediately once a separating set + # has been found. In conjunction with the modified majority rule employed for orienting links, order independence + # (with respect to the index 'i' on X^i_t) then requires that the tested conditioning sets are ordered in an order + # independent way. Here, the minimal effect size of previous conditional independence tests serve as an order + # independent order criterion. + if self.break_once_separated or not np.isinf(self.max_q_global): + S_search_YX = self._sort_search_set(S_search_YX, Y) + if test_X: + S_search_XY = self._sort_search_set(S_search_XY, X) + + # Verbose output + if self.verbosity >= 2: + print("for S_pc in combinations(S_search_YX, p_pc)") + + # Run through all cardinality p_pc subsets of S_search_YX + q_count = 0 + for S_pc in combinations(S_search_YX, p_pc): + + q_count = q_count + 1 + if q_count > self.max_q_global: + break + + # Build the full conditioning set + Z = set(S_pc) + Z = Z.union(S_default_YX) + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max) + + if self.verbosity >= 2: + print("Non-ANC(Y): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in S_default_YX]), ' '.join([str(z) for z in S_pc]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z)) + + # Check whether test result was significant + if pval > self.pc_alpha: + + # Mark the edge from X to Y for removal and save sepset + to_remove[Y[0]][X] = True + self._save_sepset(X, Y, (frozenset(Z), "wm")) + + # Verbose output + if self.verbosity >= 1: + print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX)) + + if self.break_once_separated: + break + + if test_X: + + # Verbose output + if self.verbosity >= 2: + print("for S_pc in combinations(S_search_XY, p_pc)") + + # Run through all cardinality p_pc subsets of S_search_XY + q_count = 0 + for S_pc in combinations(S_search_XY, p_pc): + + q_count = q_count + 1 + if q_count > self.max_q_global: + break + + # Build the full conditioning set + Z = set(S_pc) + Z = Z.union(S_default_XY) + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max) + + if self.verbosity >= 2: + print("Non-ANC(X): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in S_default_XY]), ' '.join([str(z) for z in S_pc]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z)) + + # Check whether test result was significant + if pval > self.pc_alpha: + + # Mark the edge from X to Y for removal and save sepset + to_remove[Y[0]][X] = True + self._save_sepset(X, Y, (frozenset(Z), "wm")) + + # Verbose output + if self.verbosity >= 1: + print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX)) + + if self.break_once_separated: + break + + # end for links in link_list + + ########################################################################################################## + ### Remove edges marked for removal in to_remove ######################################################### + + # Check whether there is any removal at all + any_removal_this = False + + # Run through all of the nested dictionary + for j in range(self.N): + for (i, lag_i) in to_remove[j].keys(): + + # Remember that at least one edge has been removed, remove the edge + any_removal = True + any_removal_this = True + self._write_link((i, lag_i), (j, 0), "", verbosity = self.verbosity) + + # If any_removal_this = True, we need to recalculate full graph dict + if any_removal_this: + self._initialize_full_graph() + self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)} + + + # end for links in link_list + + # Verbose output + if self.verbosity >= 1: + print("\nTest phase complete") + + ############################################################################################################## + ### Orientations and next step ############################################################################### + + if any_removal: + # At least one edge was removed or at least one middle mark has been updated. Therefore: i) apply the full set of + # orientation rules, ii) restart the while loop at p_pc = 0, unless all edges have converged, then break the while loop + + any_update = self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False) + + if any_update: + self._initialize_full_graph() + self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)} + p_pc = 0 + + else: + p_pc = p_pc + 1 + + else: + # The graph has not changed at all in this iteration of the while loop. Therefore, if all edges have converged, break + # the while loop. If at least one edge has not yet converged, increase p_pc by one. + + if has_converged: + break + else: + p_pc = p_pc + 1 + + # end while True + + ################################################################################################################## + ### Final rule applications ###################################################################################### + + self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False) + + # Return + return True + + + def _run_orientation_phase(self, rule_list, only_lagged = False): + """Exhaustively apply the rules specified by rule_list, this is Algorithm S4""" + + # Verbose output + if self.verbosity >= 1: + print("\nStarting orientation phase") + print("with rule list: ", rule_list) + + # Remember whether this call to _run_orientation_phase has made any update to G + restarted_once = False + + # Run through all priority levels of rule_list + idx = 0 + while idx <= len(rule_list) - 1: + + # Some rule require self._graph_full_dict. Therefore, it is initialized once the while loop (re)-starts at the first + # prioprity level + if idx == 0: + self._initialize_full_graph() + + # Remember whether G will be updated with new useful information ('x' marks are considered not useful) + restart = False + + ########################################################################################################### + ### Rule application ###################################################################################### + + # Get the current rules + current_rules = rule_list[idx] + + # Prepare a list to remember marked orientations + to_orient = [] + + # Run through all current rules + for rule in current_rules: + + # Verbose output + if self.verbosity >= 1: + print("\n{}:".format(rule)) + + # Exhaustively apply the rule to the graph... + orientations = self._apply_rule(rule, only_lagged) + + # Verbose output + if self.verbosity >= 1: + for ((i, j, lag_i), new_link) in set(orientations): + print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Marked:", i, lag_i, self._get_link((i, lag_i), (j, 0)), j, 0,i, lag_i, new_link, j, 0)) + if len(orientations) == 0: + print("Found nothing") + + # ... and stage the results for orientation and removal + to_orient.extend(orientations) + + ########################################################################################################### + ### Aggregation of marked orientations #################################################################### + + links_to_remove = set() + links_to_fix = set() + new_ancs = {j: set() for j in range(self.N)} + new_non_ancs = {j: set() for j in range(self.N)} + + # Run through all of the nested dictionary + for ((i, j, lag_i), new_link) in to_orient: + + # The old link + old_link = self._get_link((i, lag_i), (j, 0)) + + # Is the link marked for removal? + if new_link == "" and len(old_link) > 0: + links_to_remove.add((i, j, lag_i)) + continue + + # Assert that no preceeding variable is marked as an ancestor of later variable + assert not (lag_i > 0 and new_link[2] == "-") + + # Is the link marked for fixation? + if new_link[1] == "-" and old_link[1] != "-": + links_to_fix.add((i, j, lag_i)) + + # New ancestral relation of (i, lag_i) to (j, 0) + if new_link[0] == "-" and old_link[0] != "-": + new_ancs[j].add((i, lag_i)) + elif new_link[0] == "<" and old_link[0] != "<": + new_non_ancs[j].add((i, lag_i)) + + # New ancestral relation of (j, 0) to (i, lag_i == 0) + if lag_i == 0: + if new_link[2] == "-" and old_link[2] != "-": + new_ancs[i].add((j, 0)) + elif new_link[2] == ">" and old_link[2] != ">": + new_non_ancs[i].add((j, 0)) + + # Resolve conflicts about removal and fixation + ambiguous_links = links_to_fix.intersection(links_to_remove) + links_to_fix = links_to_fix.difference(ambiguous_links) + links_to_remove = links_to_remove.difference(ambiguous_links) + + ########################################################################################################### + ### Removals, update middle marks, update ancestral information ########################################### + + # Remove links + for (i, j, lag_i) in links_to_remove: + self._write_link((i, lag_i), (j, 0), "", verbosity = self.verbosity) + restart = True + + # Fix links + for (i, j, lag_i) in links_to_fix: + old_link = self._get_link((i, lag_i), (j, 0)) + new_link = old_link[0] + "-" + old_link[2] + self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity) + restart = True + + # Mark links as ambiguous + for (i, j, lag_i) in ambiguous_links: + old_link = self._get_link((i, lag_i), (j, 0)) + new_link = old_link[0] + "x" + old_link[2] + self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity) + + # Update ancestral information. The function called includes conflict resolution + restart = restart or self._apply_new_ancestral_information(new_non_ancs, new_ancs) + + ########################################################################################################### + ### Make separating sets of removed links weakly minimal ################################################## + + if len(links_to_remove) > 0: + + # Verbose output + if self.verbosity >= 1: + print("\nLinks were removed by rules\n") + + new_ancs = {j: set() for j in range(self.N)} + new_non_ancs = {j: set() for j in range(self.N)} + + # Run through all links that have been removed + for (i, j, lag_i) in links_to_remove: + + X = (i, lag_i) + Y = (j, 0) + + # Get ancestors of X and Y + ancs_XY = self._get_ancs([X, Y]).difference({X, Y}) + + # Read out all separating sets that were found in the rule phase, then consider only those of minimal + # cardinality + old_sepsets_all = {Z for (Z, _) in self._get_sepsets(X, Y)} + min_size = min({len(Z) for Z in old_sepsets_all}) + old_sepsets_smallest = {Z for Z in old_sepsets_all if len(Z) == min_size} + + # For all separating sets of minimal cardinality, find weakly minimal separating subsets + self._delete_sepsets(X, Y) + self._make_sepset_weakly_minimal(X, Y, old_sepsets_smallest, ancs_XY) + new_sepsets = self._get_sepsets(X, Y) + + # end for (i, j, lag_i) in links_to_remove + # end if len(links_to_remove) > 0 + + # If any useful new information was found, go back to idx = 0, else increase idx by 1 + if restart: + idx = 0 + restarted_once = True + else: + idx = idx + 1 + + # end while idx <= len(rule_list) - 1 + + # Verbose output + if self.verbosity >= 1: + print("\nOrientation phase complete") + + # No return value + return restarted_once + + ######################################################################################################################## + ######################################################################################################################## + ######################################################################################################################## + + def _get_default_and_search_sets(self, A, B, phase): + """Return the default conditioning set and PC search set""" + + if phase == "ancestral": + + # This is a-pds-t(A, B) + S_raw = self._get_a_pds_t(A, B) + + # Determine the default conditioning set + S_default = self._get_parents(A, B).difference({A, B}) + + # Determine the PC search set + S_search = S_raw.difference(S_default) + + + elif phase == "non-ancestral": + + # This is na-pds-t(A, B) + S_raw = self._get_na_pds_t(A, B) + + self.max_na_pds_set_found = max(self.max_na_pds_set_found, len(S_raw)) + + # Determine the default conditioning set + S_default = S_raw.intersection(self._get_ancs([A, B])) + S_default = S_default.union(self._get_parents(A, B)) + S_default = S_default.difference({A, B}) + + # Determine the PC search set + S_search = S_raw.difference(S_default) + + # Return + return S_default, S_search + + + def _apply_new_ancestral_information(self, new_non_ancs, new_ancs): + """Apply the new ancestorships and non-ancestorships specified by new_non_ancs and new_ancs to the current graph. Conflicts + are resolved by marking. Returns True if any circle mark was turned into a head or tail, else False.""" + + ####################################################################################################### + ### Preprocessing ##################################################################################### + + # Memory variables + add_to_def_non_ancs = {j: set() for j in range(self.N)} + add_to_def_ancs = {j: set() for j in range(self.N)} + add_to_ambiguous_ancestorships = {j: set() for j in range(self.N)} + put_head_or_tail = False + + # Default values + if new_non_ancs is None: + new_non_ancs = {j: set() for j in range(self.N)} + + if new_ancs is None: + new_ancs = {j: set() for j in range(self.N)} + + # Marking A as ancestor of B implies that B is marked as a non-ancestor of A. This is only non-trivial for A before B + for j in range(self.N): + for (i, lag_i) in new_ancs[j]: + if lag_i == 0: + new_non_ancs[i].add((j, 0)) + + ####################################################################################################### + ### Conflict resolution ############################################################################### + + # Iterate through new_non_ancs + for j in range(self.N): + for (i, lag_i) in new_non_ancs[j]: + # X = (i, lag_i), Y = (j, 0) + # X is marked as non-ancestor for Y + + # Conflict resolution + if (i, lag_i) in self.ambiguous_ancestorships[j]: + # There is a conflict, since it is already marked as ambiguous whether X is an ancestor of Y + if self.verbosity >= 1: + print("{:10} ({}, {:2}) marked as non-anc of {} but saved as ambiguous".format("Conflict:", i, lag_i, (j, 0))) + + elif (i, lag_i) in self.def_ancs[j]: + # There is a conflict, since X is already marked as ancestor of Y + add_to_ambiguous_ancestorships[j].add((i, lag_i)) + + if self.verbosity >= 1: + print("{:10} ({}, {:2}) marked as non-anc of {} but saved as anc".format("Conflict:", i, lag_i, (j, 0))) + + elif (i, lag_i) in new_ancs[j]: + # There is a conflict, since X is also marked as a new ancestor of Y + add_to_ambiguous_ancestorships[j].add((i, lag_i)) + + if self.verbosity >= 1: + print("{:10} ({}, {:2}) marked as both anc- and non-anc of {}".format("Conflict:", i, lag_i, (j, 0))) + + else: + # There is no conflict + add_to_def_non_ancs[j].add((i, lag_i)) + + # Iterate through new_ancs + for j in range(self.N): + for (i, lag_i) in new_ancs[j]: + # X = (i, lag_i), Y = (j, 0) + # X is marked as ancestor for Y + + # Conflict resolution + if (i, lag_i) in self.ambiguous_ancestorships[j]: + # There is a conflict, since it is already marked as ambiguous whether X is an ancestor of Y + if self.verbosity >= 1: + print("{:10} ({}, {:2}) marked as anc of {} but saved as ambiguous".format("Conflict:", i, lag_i, (j, 0))) + + elif lag_i == 0 and (j, 0) in self.ambiguous_ancestorships[i]: + # There is a conflict, since X and Y are contemporaneous and it is already marked ambiguous as whether Y is an + # ancestor of X + # Note: This is required here, because X being an ancestor of Y implies that Y is not an ancestor of X. This + # ambiguity cannot exist when X is before Y + if self.verbosity >= 1: + print("{:10} ({}, {:2}) marked as anc of {} but saved as ambiguous".format("Conflict:", i, lag_i, (j, 0))) + + elif (i, lag_i) in self.def_non_ancs[j]: + # There is a conflict, since X is already marked as non-ancestor of Y + add_to_ambiguous_ancestorships[j].add((i, lag_i)) + + if self.verbosity >= 1: + print("{:10} ({}, {:2}) marked as anc of {} but saved as non-anc".format("Conflict:", i, lag_i, (j, 0))) + + elif (i, lag_i) in new_non_ancs[j]: + # There is a conflict, since X is also marked as a new non-ancestor of Y + add_to_ambiguous_ancestorships[j].add((i, lag_i)) + + if self.verbosity >= 1: + print("{:10} ({}, {:2}) marked as both anc- and non-anc of {}".format("Conflict:", i, lag_i, (j, 0))) + + else: + # There is no conflict + add_to_def_ancs[j].add((i, lag_i)) + + ####################################################################################################### + + ####################################################################################################### + ### Apply the ambiguous information ################################################################### + + for j in range(self.N): + + for (i, lag_i) in add_to_ambiguous_ancestorships[j]: + + old_link = self._get_link((i, lag_i), (j, 0)) + if len(old_link) > 0 and old_link[0] != "x": + + new_link = "x" + old_link[1] + old_link[2] + self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity) + + if self.verbosity >= 1: + if (i, lag_i) in self.def_ancs[j]: + print("{:10} Removing ({}, {:2}) as anc of {}".format("Update:", i, lag_i, (j, 0))) + if (i, lag_i) in self.def_non_ancs[j]: + print("{:10} Removing ({}, {:2}) as non-anc of {}".format("Update:", i, lag_i, (j, 0))) + + self.def_ancs[j].discard((i, lag_i)) + self.def_non_ancs[j].discard((i, lag_i)) + + if lag_i == 0: + + if self.verbosity >= 1 and (j, 0) in self.def_ancs[i]: + print("{:10} Removing {} as anc of {}".format("Update:", i, lag_i, (j, 0))) + + self.def_ancs[i].discard((j, 0)) + # Do we also need the following? + # self.def_non_ancs[i].discard((j, 0)) + + if self.verbosity >= 1 and (i, lag_i) not in self.ambiguous_ancestorships[j]: + print("{:10} Marking ancestorship of ({}, {:2}) to {} as ambiguous".format("Update:", i, lag_i, (j, 0))) + + self.ambiguous_ancestorships[j].add((i, lag_i)) + + ####################################################################################################### + ### Apply the unambiguous information ################################################################# + + for j in range(self.N): + + for (i, lag_i) in add_to_def_non_ancs[j]: + + old_link = self._get_link((i, lag_i), (j, 0)) + if len(old_link) > 0 and old_link[0] != "<": + new_link = "<" + old_link[1] + old_link[2] + self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity) + put_head_or_tail = True + + if self.verbosity >= 1 and (i, lag_i) not in self.def_non_ancs[j]: + print("{:10} Marking ({}, {:2}) as non-anc of {}".format("Update:", i, lag_i, (j, 0))) + + self.def_non_ancs[j].add((i, lag_i)) + + + for (i, lag_i) in add_to_def_ancs[j]: + + old_link = self._get_link((i, lag_i), (j, 0)) + if len(old_link) > 0 and (old_link[0] != "-" or old_link[2] != ">"): + new_link = "-" + old_link[1] + ">" + self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity) + put_head_or_tail = True + + if self.verbosity >= 1 and (i, lag_i) not in self.def_ancs[j]: + print("{:10} Marking ({}, {:2}) as anc of {}".format("Update:", i, lag_i, (j, 0))) + + self.def_ancs[j].add((i, lag_i)) + + if lag_i == 0: + + if self.verbosity >= 1 and (j, 0) not in self.def_non_ancs[i]: + print("{:10} Marking {} as non-anc of {}".format("Update:",(j, 0), (i, 0))) + + self.def_non_ancs[i].add((j, 0)) + + ####################################################################################################### + + return put_head_or_tail + + def _apply_rule(self, rule, only_lagged): + """Call the orientation-removal-rule specified by the string argument rule.""" + + if rule == "APR": + return self._apply_APR(only_lagged) + elif rule == "ER-00-a": + return self._apply_ER00a(only_lagged) + elif rule == "ER-00-b": + return self._apply_ER00b(only_lagged) + elif rule == "ER-00-c": + return self._apply_ER00c(only_lagged) + elif rule == "ER-00-d": + return self._apply_ER00d(only_lagged) + elif rule == "ER-01": + return self._apply_ER01(only_lagged) + elif rule == "ER-02": + return self._apply_ER02(only_lagged) + elif rule == "ER-03": + return self._apply_ER03(only_lagged) + elif rule == "R-04": + return self._apply_R04(only_lagged) + elif rule == "ER-08": + return self._apply_ER08(only_lagged) + elif rule == "ER-09": + return self._apply_ER09(only_lagged) + elif rule == "ER-10": + return self._apply_ER10(only_lagged) + + + def _get_na_pds_t(self, A, B): + """Return the set na_pds_t(A, B), with at least one of them at lag 0""" + + # Unpack A and B, then assert that at least one of them is at lag 0 + var_A, lag_A = A + var_B, lag_B = B + assert lag_A == 0 or lag_B == 0 + + # If na_pds_t(A, B) is in memory, return immediately + memo = self._na_pds_t[A].get(B) + if memo is not None: + return memo + + # Else, re-compute na_pds_t(A, B) it according to the current graph and cache it. + + # Re-compute na_pds_t_1(A, B) according to the current graph + na_pds_t_1 = {(var, lag + lag_A) + # W = (var, lag + lag_A) is in na_pds_t_1(A, B) if ... + for ((var, lag), link) in self.graph_dict[var_A].items() + # ... it is a non-future adjacency of A + if len(link) > 0 + # ... and is not B + and (var, lag + lag_A) != B + # ... and is not before t - tau_max + and (lag + lag_A) >= -self.tau_max + # ... and is not after both A and B + # ... (i.e. is not after time t) + and (lag + lag_A) <= 0 + # ... and is not a definite non-ancestor of A, + # which implies that it is not a definite descendant of A, + and link[0] != "<" + # ... and is not a definite descendant of B + # (i.e., B is not a definite ancestor of W) + and (var_B, lag_B - (lag + lag_A)) not in self.def_ancs[var] + } + + # Compute na_pds_t_2(A, B) + + # Find all potential C_1 nodes + C1_list = set() + for ((var, lag), link) in self.graph_full_dict[var_A].items(): + + node = (var, lag + lag_A) + + # node is added to C1_list if, in addition to being adjacent to A, ... + # ... it is not B + if (var, lag + lag_A) == B: + continue + + # ... it is not before t - tau_max + if (lag + lag_A) < -self.tau_max: + continue + + # ... it is not after B + if (lag + lag_A) > lag_B: + continue + + # ... it is not a definite ancestor of A + if link[0] == "-": + continue + + # ... it is not a definite descendant of A + if link[2] == "-": + continue + + # ... it is not a definite non-ancestor of B, + # which implies that it is not a definite descendant of B + if (var, (lag + lag_A) - lag_B) in self.def_non_ancs[var_B]: + continue + + # If all tests are passed, node is added to C1_list + C1_list.add(node) + + # end for ((var, lag), link) in self.graph_full_dict[var_A].items() + + # Breath first search to find (a superset of) na_pds_t_2(A, B) + + visited = set() + start_from = {(C1, A) for C1 in C1_list} + + while start_from: + + new_start_from = set() + new_do_not_visit = set() + + for (current_node, previous_node) in start_from: + + visited.add((current_node, previous_node)) + + for (var, lag) in self.graph_full_dict[current_node[0]]: + + next_node = (var, lag + current_node[1]) + + if next_node[1] < -self.tau_max: + continue + if next_node[1] > 0: + continue + if (next_node, current_node) in visited: + continue + if next_node == previous_node: + continue + if next_node == B: + continue + if next_node == A: + continue + + link_l = self._get_link(next_node, current_node) + link_r = self._get_link(previous_node, current_node) + + if link_l[2] == "-" or link_r[2] == "-": + continue + if self._get_link(next_node, previous_node) == "" and (link_l[2] == "o" or link_r[2] == "o"): + continue + if (var_A, lag_A - next_node[1]) in self.def_ancs[next_node[0]] or (var_B, lag_B - next_node[1]) in self.def_ancs[next_node[0]]: + continue + if ((next_node[1] - lag_A > 0) or (next_node[0], next_node[1] - lag_A) in self.def_non_ancs[var_A]) and ((next_node[1] - lag_B > 0) or (next_node[0], next_node[1] - lag_B) in self.def_non_ancs[var_B]): + continue + + new_start_from.add((next_node, current_node)) + + start_from = new_start_from + + # end while start_from + + na_pds_t_2 = {node for (node, _) in visited} + + self._na_pds_t[A][B] = na_pds_t_1.union(na_pds_t_2).difference({A, B}) + return self._na_pds_t[A][B] + + + def _make_sepset_weakly_minimal(self, X, Y, Z_list, ancs): + """ + X and Y are conditionally independent given Z in Z_list However, it is not yet clear whether any of these Z are minimal + separating set. + + This function finds weakly minimal separating subsets in an order independent way and writes them to the self.sepsets + dictionary. Only certainly weakly minimal separating subsets are retained. + """ + + # Assert that all Z in Z_list have the same cardinality + assert len({len(Z) for Z in Z_list}) == 1 + + # Base Case 1: + # Z in Z_list is weakly minimal if len(Z) <= 1 or Z \subset ancs + any_weakly_minimal = False + + for Z in Z_list: + + if len(Z) <=1 or Z.issubset(ancs): + self._save_sepset(X, Y, (frozenset(Z), "wm")) + any_weakly_minimal = True + + if any_weakly_minimal: + return None + + # If not Base Case 1, we need to search for separating subsets. We do this for all Z in Z_list, and build a set sepsets_next_call + # that contains all separating sets for the next recursive call + sepsets_next_call = set() + + for Z in Z_list: + + # Find all nodes A in Z that are not in ancs + removable = Z.difference(ancs) + + # Test for removal of all nodes in removable + new_sepsets = [] + val_values = [] + + for A in removable: + + Z_A = [node for node in Z if node != A] + + # Run the conditional independence test + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = Z_A, tau_max = self.tau_max) + + if self.verbosity >= 2: + print("MakeMin: %s _|_ %s | Z_A = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in list(Z_A)]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z_A)) + + # Check whether the test result was significant + if pval > self.pc_alpha: + new_sepsets.append(frozenset(Z_A)) + val_values.append(val) + + # If new_sepsets is empty, then Z is already weakly minimal + if len(new_sepsets) == 0: + self._save_sepset(X, Y, (frozenset(Z), "wm")) + any_weakly_minimal = True + + # If we did not yet find a weakly minimal separating set + if not any_weakly_minimal: + + # Sort all separating sets in new_sepets by their test statistic, then append those separating sets with maximal statistic + # to sepsets_next_call. This i) guarantees order independence while ii) continuing to test as few as possible separating sets + new_sepsets = [node for _, node in sorted(zip(val_values, new_sepsets), reverse = True)] + + i = -1 + while i <= len(val_values) - 2 and val_values[i + 1] == val_values[0]: + sepsets_next_call.add(new_sepsets[i]) + i = i + 1 + + assert i >= 0 + + # If we did not yet find a weakly minimal separating set, make a recursive call + if not any_weakly_minimal: + self._make_sepset_weakly_minimal(X, Y, sepsets_next_call, ancs) + else: + return None + + + def _B_not_in_SepSet_AC(self, A, B, C): + """Is B in less than half of the sets in SepSets(A, C)?""" + + # Treat A - B - C as the same triple as C - B - A + # Convention: A is before C or, if they are contemporaneous, the index of A is smaller than that of C + if C[1] < A[1] or (C[1] == A[1] and C[0] < A[0]): + return self._B_not_in_SepSet_AC(C, B, A) + + # Remember all separating sets that we will find + all_sepsets = set() + + # Get the non-future adjacencies of A and C + if not self.use_a_pds_t_for_majority: + adj_A = self._get_non_future_adj([A]).difference({A, C}) + adj_C = self._get_non_future_adj([C]).difference({A, C}) + else: + adj_A = self._get_a_pds_t(A, C).difference({A, C}) + adj_C = self._get_a_pds_t(C, A).difference({A, C}) + + Z_add = self._get_parents(A, C).difference({A, C}) + + search_A = adj_A.difference(Z_add) + search_C = adj_C.difference(Z_add) + + if not np.isinf(self.max_q_global): + search_A = self._sort_search_set(search_A, A) + search_C = self._sort_search_set(search_C, C) + + # Test for independence given all subsets of non-future adjacencies of A + if A[1] < C[1]: + max_p_A = min([len(search_A), self.max_cond_px, self.max_p_global]) + 1 + else: + max_p_A = min([len(search_A), self.max_p_global]) + 1 + + # Shift lags + search_A = [(var, lag - C[1]) for (var, lag) in search_A] + search_C = [(var, lag - C[1]) for (var, lag) in search_C] + Z_add = {(var, lag - C[1]) for (var, lag) in Z_add} + X = (A[0], A[1] - C[1]) + Y = (C[0], 0) + + for p in range(max_p_A): + + q_count = 0 + for Z_raw in combinations(search_A, p): + + q_count = q_count + 1 + if q_count > self.max_q_global: + break + + # Prepare the conditioning set + Z = {node for node in Z_raw if node != X and node != Y} + Z = Z.union(Z_add) + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max) + + if self.verbosity >= 2: + print("BnotinSepSetAC(A): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in Z_add]), ' '.join([str(z) for z in {node for node in Z_raw if node != X and node != Y}]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z)) + + # Check whether test result was significant + if pval > self.pc_alpha: + all_sepsets.add(frozenset(Z)) + + # Test for independence given all subsets of non-future adjacencies of C + for p in range(min(len(search_C), self.max_p_global) + 1): + + q_count = 0 + for Z_raw in combinations(search_C, p): + + q_count = q_count + 1 + if q_count > self.max_q_global: + break + + # Prepare the conditioning set + Z = {node for node in Z_raw if node != X and node != Y} + Z = Z.union(Z_add) + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max) + + if self.verbosity >= 2: + # print("BnotinSepSetAC(C): %s _|_ %s | Z = %s: val = %.2f / pval = % .4f" % + # (X, Y, ' '.join([str(z) for z in list(Z)]), val, pval)) + print("BnotinSepSetAC(C): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in Z_add]), ' '.join([str(z) for z in {node for node in Z_raw if node != X and node != Y}]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z)) + + # Check whether test result was significant + if pval > self.pc_alpha: + all_sepsets.add(frozenset(Z)) + + # Append the already known sepset + all_sepsets = all_sepsets.union({Z for (Z, _) in self._get_sepsets(X, Y)}) + + # Count number of sepsets and number of sepsets that contain B + n_sepsets = len(all_sepsets) + n_sepsets_with_B = len([1 for Z in all_sepsets if (B[0], B[1] - C[1]) in Z]) + + return True if 2*n_sepsets_with_B < n_sepsets else False + + + def _B_in_SepSet_AC(self, A, B, C): + """Is B in more than half of the sets in SepSets(A, C)?""" + + # Treat A - B - C as the same triple as C - B - A + # Convention: A is before C or, if they are contemporaneous, the index of A is smaller than that of C + if C[1] < A[1] or (C[1] == A[1] and C[0] < A[0]): + return self._B_in_SepSet_AC(C, B, A) + + link_AB = self._get_link(A, B) + link_CB = self._get_link(C, B) + + if link_AB == "" or link_CB == "" or link_AB[1] != "-" or link_CB[1] != "-": + + # Vote is based on those sets that where found already + all_sepsets = {Z for (Z, _) in self._get_sepsets(A, C)} + + # Count number of sepsets and number of sepsets that contain B + n_sepsets = len(all_sepsets) + n_sepsets_with_B = len([1 for Z in all_sepsets if B in Z]) + + return True if 2*n_sepsets_with_B > n_sepsets else False + + else: + + # Remember all separating sets that we will find + all_sepsets = set() + + # Get the non-future adjacencies of A and C + if not self.use_a_pds_t_for_majority: + adj_A = self._get_non_future_adj([A]).difference({A, C}) + adj_C = self._get_non_future_adj([C]).difference({A, C}) + else: + adj_A = self._get_a_pds_t(A, C).difference({A, C}) + adj_C = self._get_a_pds_t(C, A).difference({A, C}) + + Z_add = self._get_parents(A, C).difference({A, C}) + + search_A = adj_A.difference(Z_add) + search_C = adj_C.difference(Z_add) + + if not np.isinf(self.max_q_global): + search_A = self._sort_search_set(search_A, A) + search_C = self._sort_search_set(search_C, C) + + # Test for independence given all subsets of non-future adjacencies of A + if A[1] < C[1]: + max_p_A = min([len(search_A), self.max_cond_px, self.max_p_global]) + 1 + else: + max_p_A = min([len(search_A), self.max_p_global]) + 1 + + # Shift lags + search_A = [(var, lag - C[1]) for (var, lag) in search_A] + search_C = [(var, lag - C[1]) for (var, lag) in search_C] + Z_add = {(var, lag - C[1]) for (var, lag) in Z_add} + X = (A[0], A[1] - C[1]) + Y = (C[0], 0) + + for p in range(max_p_A): + + q_count = 0 + for Z_raw in combinations(search_A, p): + + q_count = q_count + 1 + if q_count > self.max_q_global: + break + + # Prepare the conditioning set + Z = {node for node in Z_raw if node != X and node != Y} + Z = Z.union(Z_add) + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max) + + if self.verbosity >= 2: + # print("BinSepSetAC(A): %s _|_ %s | Z = %s: val = %.2f / pval = % .4f" % + # (X, Y, ' '.join([str(z) for z in list(Z)]), val, pval)) + print("BinSepSetAC(A): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in Z_add]), ' '.join([str(z) for z in {node for node in Z_raw if node != X and node != Y}]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z)) + + # Check whether test result was significant + if pval > self.pc_alpha: + all_sepsets.add(frozenset(Z)) + + # Test for independence given all subsets of non-future adjacencies of C + for p in range(min(len(search_C), self.max_p_global) + 1): + + q_count = 0 + for Z_raw in combinations(search_C, p): + + q_count = q_count + 1 + if q_count > self.max_q_global: + break + + # Prepare the conditioning set + Z = {node for node in Z_raw if node != X and node != Y} + Z = Z.union(Z_add) + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max) + + if self.verbosity >= 2: + # print("BinSepSetAC(C): %s _|_ %s | Z = %s: val = %.2f / pval = % .4f" % + # (X, Y, ' '.join([str(z) for z in list(Z)]), val, pval)) + print("BinSepSetAC(C): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in Z_add]), ' '.join([str(z) for z in {node for node in Z_raw if node != X and node != Y}]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic + # values and conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z)) + + # Check whether test result was significant + if pval > self.pc_alpha: + all_sepsets.add(frozenset(Z)) + + # Append the already known sepset + all_sepsets = all_sepsets.union({Z for (Z, _) in self._get_sepsets(X, Y)}) + + # Count number of sepsets and number of sepsets that contain B + n_sepsets = len(all_sepsets) + n_sepsets_with_B = len([1 for Z in all_sepsets if (B[0], B[1] - C[1]) in Z]) + + return True if 2*n_sepsets_with_B > n_sepsets else False + + + def _get_parents(self, A, B): + """Return all known parents of all nodes in node_list""" + + if self.parents_of_lagged or A[1] == B[1]: + + out = {(var, lag + A[1]) for ((var, lag), link) in self.graph_dict[A[0]].items() if len(link) > 0 and link[0] == "-" and lag + A[1] >= -self.tau_max} + return out.union({(var, lag + B[1]) for ((var, lag), link) in self.graph_dict[B[0]].items() if len(link) > 0 and link[0] == "-" and lag + B[1] >= -self.tau_max}) + + else: + if A[1] < B[1]: + return {(var, lag + B[1]) for ((var, lag), link) in self.graph_dict[B[0]].items() if len(link) > 0 and link[0] == "-" and lag + B[1] >= -self.tau_max} + else: + return {(var, lag + A[1]) for ((var, lag), link) in self.graph_dict[A[0]].items() if len(link) > 0 and link[0] == "-" and lag + A[1] >= -self.tau_max} + + + def _apply_middle_mark(self, X, Y, char): + """Update the middle mark on the link between X and Y with the character char""" + + # Get the old link + old_link = self._get_link(X, Y) + + # Determine the new link + if old_link[1] == "?": + new_link = old_link[0] + char + old_link[2] + elif (old_link[1] == "L" and char == "R") or (old_link[1] == "R" and char == "L"): + new_link = old_link[0] + "!" + old_link[2] + else: + assert False + + # Write the new link + self._write_link(X, Y, new_link, verbosity = self.verbosity) + + # Return + return True + + + def _update_middle_marks(self): + """Apply rule MMR""" + + if self.verbosity >= 1: + print("\nMiddle mark updates\n") + + # Run through all links + for j in range(self.N): + for ((i, lag_i), link) in self.graph_dict[j].items(): + + if link == "": + continue + + X = (i, lag_i) + Y = (j, 0) + + # Apply above rule for A = X and B = Y + link_XY = self._get_link(X, Y) + smaller_XY = self._is_smaller(X, Y) + + if link_XY[2] == ">": + + if link_XY[1] == "?": + if smaller_XY: + new_link = link_XY[0] + "L>" + else: + new_link = link_XY[0] + "R>" + + self._write_link(X, Y, new_link, verbosity = self.verbosity) + + elif (link_XY[1] == "R" and smaller_XY) or (link_XY[1] == "L" and not smaller_XY): + + new_link = link_XY[0] + "!>" + + self._write_link(X, Y, new_link, verbosity = self.verbosity) + + + # Apply above rule for A = Y and B = X + link_YX = self._get_link(Y, X) + smaller_YX = self._is_smaller(Y, X) + + if link_YX[2] == ">": + + if link_YX[1] == "?": + if smaller_YX: + new_link = link_YX[0] + "L>" + else: + new_link = link_YX[0] + "R>" + + self._write_link(Y, X, new_link, verbosity = self.verbosity) + + + elif (link_YX[1] == "R" and smaller_YX) or (link_YX[1] == "L" and not smaller_YX): + + new_link = link_YX[0] + "!>" + + self._write_link(Y, X, new_link, verbosity = self.verbosity) + + def _is_smaller(self, X, Y): + """ + A node X is said to be smaller than node Y if + i) X is before Y or + ii) X and Y are contemporaneous and the variable index of X is smaller than that of Y. + + Return True if X is smaller than Y, else return False + """ + + return (X[1] < Y [1]) or (X[1] == Y[1] and X[0] < Y[0]) + + + def _get_a_pds_t(self, A, B): + """Return the set a_pds_t(A, B)""" + + # Unpack A and assert that A is at lag 0 + var_A, lag_A = A + + # Compute a_pds_t(A, B) according to the current graph + return {(var, lag + lag_A) + # W = (var, lag) is in a_pds_t(A, B) if ... + for ((var, lag), link) in self.graph_dict[var_A].items() + # ... it is a non-future adjacency of A + if len(link) > 0 + # ... and it is not B + and (var, lag + lag_A) != B + # ... it is not before t - self.tau_max + and lag + lag_A >= -self.tau_max + # ... and it is not a definite non-ancestor of A + and link[0] != "<" + } + + + def _get_ancs(self, node_list): + """Return the currently known set of ancestors of all nodes in the list node_list. The nodes are not required to be at + lag 0""" + + # Build the output set + out = set() + + # Run through all nodes + for A in node_list: + # Unpack the node + (var_A, lag_A) = A + # Add the ancestors of node to out + out = out.union({(var, lag + lag_A) for (var, lag) in self.def_ancs[var_A] if lag + lag_A >= - self.tau_max}) + + # Return + return out + + + def _get_non_ancs(self, node_list): + """Return the currently known set of non-ancestors of all nodes in the list node_list. The nodes are not required to be + at lag 0""" + + # Build the output set + out = set() + + # Run through all nodes + for A in node_list: + # Unpack the node + (var_A, lag_A) = A + # Add the ancestors of node to out + out = out.union({(var, lag + lag_A) for (var, lag) in self.def_non_ancs[var_A] if lag + lag_A >= - self.tau_max}) + + # Return + return out + + + def _fix_all_edges(self): + """Remove all non-trivial orientations""" + + for j in range(self.N): + for (i, lag_i) in self.graph_dict[j].keys(): + + link = self._get_link((i, lag_i), (j, 0)) + if len(link) > 0: + new_link = link[0] + "-" + link[2] + self.graph_dict[j][(i, lag_i)] = new_link + + ######################################################################################################################## + ######################################################################################################################## + ######################################################################################################################## + + def _apply_APR(self, only_lagged): + """Return all orientations implied by orientation rule APR""" + + # Build the output list + out = [] + + if self.no_apr > 0: + return out + + # Get and run through all relevant graphical structures + for j in range(self.N): + for (i, lag_i) in self.graph_dict[j]: + + A = (i, lag_i) + B = (j, 0) + + if only_lagged and lag_i == 0: + continue + + # Get the link from A to B + link_AB = self._get_link(A, B) + + if self._match_link(pattern='-!>', link=link_AB) \ + or (self._match_link(pattern='-R>', link=link_AB) and self._is_smaller(A, B)) \ + or (self._match_link(pattern='-L>', link=link_AB) and self._is_smaller(B, A)): + + # Write the new link from A to B to the output list + out.append(self._get_pair_key_and_new_link(A, B, "-->")) + + # Return the output list + return out + + def _apply_ER01(self, only_lagged): + """Return all orientations implied by orientation rule R1^prime""" + + # Build the output list + out = [] + + # Find all graphical structures that the rule applies to + all_appropriate_triples = self._find_triples(pattern_ij='**>', pattern_jk='o*+', pattern_ik='') + + # Run through all appropriate graphical structures + for (A, B, C) in all_appropriate_triples: + + if only_lagged and B[1] == C[1]: + continue + + if self.verbosity >= 2: + print("ER01: ", (A, B, C)) + + # Check whether the rule applies + if self._B_in_SepSet_AC(A, B, C): + + if self.verbosity >= 2: + print(" --> in sepset ") + + # Prepare the new link from B to C and append it to the output list + link_BC = self._get_link(B, C) + new_link_BC = "-" + link_BC[1] + ">" + out.append(self._get_pair_key_and_new_link(B, C, new_link_BC)) + + # Return the output list + return out + + def _apply_ER02(self, only_lagged): + """Return all orientations implied by orientation rule R2^prime""" + + # Build the output list + out = [] + + # Find all graphical structures that the rule applies to + all_appropriate_triples = set(self._find_triples(pattern_ij='-*>', pattern_jk='**>', pattern_ik='+*o')) + all_appropriate_triples = all_appropriate_triples.union(set(self._find_triples(pattern_ij='**>', pattern_jk='-*>', pattern_ik='+*o'))) + + # Run through all appropriate graphical structures + for (A, B, C) in all_appropriate_triples: + + if only_lagged and A[1] == C[1]: + continue + + # The rule applies to all relevant graphical structures. Therefore, prepare the new link and append it to the output list + link_AC = self._get_link(A, C) + new_link_AC = link_AC[0] + link_AC[1] + ">" + out.append(self._get_pair_key_and_new_link(A, C, new_link_AC)) + + # print("Rule 2", A, self._get_link(A, B), B, self._get_link(B, C), C, self._get_link(A, C), new_link_AC) + + # Return the output list + return out + + + def _apply_ER03(self, only_lagged): + """Return all orientations implied by orientation rule R3^prime""" + + # Build the output list + out = [] + + # Find all graphical structures that the rule applies to + all_appropriate_quadruples = self._find_quadruples(pattern_ij='**>', pattern_jk='<**', pattern_ik='', + pattern_il='+*o', pattern_jl='o*+', pattern_kl='+*o') + + # Run through all appropriate graphical structures + for (A, B, C, D) in all_appropriate_quadruples: + + if only_lagged and B[1] == D[1]: + continue + + # Check whether the rule applies + if self._B_in_SepSet_AC(A, D, C): + + # Prepare the new link from D to B and append it to the output list + link_DB = self._get_link(D, B) + new_link_DB = link_DB[0] + link_DB[1] + ">" + out.append(self._get_pair_key_and_new_link(D, B, new_link_DB)) + + # Return the output list + return out + + + def _apply_R04(self, only_lagged): + """Return all orientations implied by orientation rule R4 (standard FCI rule)""" + + # Build the output list + out = [] + + # Find all relevant triangles W-V-Y + all_appropriate_triples = self._find_triples(pattern_ij='<-*', pattern_jk='o-+', pattern_ik='-->') + + # Run through all of these triangles + for triple in all_appropriate_triples: + + (W, V, Y) = triple + + if only_lagged and (V[1] == Y[1] and W[1] == V[1]): + continue + + # Get the current link from W to V, which we will need below + link_WV = self._get_link(W, V) + + # Find all discriminating paths for this triangle + # Note: To guarantee order independence, we check all discriminating paths. Alternatively, we could check the rule for all + # shortest such paths + discriminating_paths = self._get_R4_discriminating_paths(triple, max_length = np.inf) + + # Run through all discriminating paths + for path in discriminating_paths: + + # Get the end point node + X_1 = path[-1] + + # Check which of the two cases of the rule we are in, then append the appropriate new links to the output list + if self._B_in_SepSet_AC(X_1, V, Y): + # New link from V to Y + out.append(self._get_pair_key_and_new_link(V, Y, "-->")) + + elif link_WV != "<-x" and self._B_not_in_SepSet_AC(X_1, V, Y): + # New link from V to Y + out.append(self._get_pair_key_and_new_link(V, Y, "<->")) + + # If needed, also the new link from W to V + if link_WV != "<->": + out.append(self._get_pair_key_and_new_link(W, V, "<->")) + + # Return the output list + return out + + + def _apply_ER08(self, only_lagged): + """Return all orientations implied by orientation rule R8^prime""" + + # Build the output list + out = [] + + # Find all graphical structures that the rule applies to + all_appropriate_triples = self._find_triples(pattern_ij='-*>', pattern_jk='-*>', pattern_ik='o*+') + + # Run through all appropriate graphical structures + for (A, B, C) in all_appropriate_triples: + + if only_lagged and A[1] == C[1]: + continue + + # The rule applies to all relevant graphical structures. Therefore, prepare the new link and append it to the output list + link_AC = self._get_link(A, C) + new_link_AC = "-" + link_AC[1] + ">" + out.append(self._get_pair_key_and_new_link(A, C, new_link_AC)) + + #print("Rule 8:", A, self._get_link(A, B), B, self._get_link(B, C), C, link_AC, new_link_AC) + + # Return the output list + return out + + + def _apply_ER09(self, only_lagged): + """Return all orientations implied by orientation rule R9^prime""" + + # Build the output list + out = [] + + # Find unshielded triples B_1 o--*--o A o--*--> C or B_1 <--*--o A o--*--> C or B_1 <--*-- A o--*--> C + all_appropriate_triples = set(self._find_triples(pattern_ij='o*o', pattern_jk='o*>', pattern_ik='')) + all_appropriate_triples = all_appropriate_triples.union(set(self._find_triples(pattern_ij='<*o', pattern_jk='o*>', pattern_ik=''))) + all_appropriate_triples = all_appropriate_triples.union(set(self._find_triples(pattern_ij='<*-', pattern_jk='o*>', pattern_ik=''))) + + # Run through all these triples + for (B_1, A, C) in all_appropriate_triples: + + if only_lagged and A[1] == C[1]: + continue + + # Check whether A is in SepSet(B_1, C), else the rule does not apply + if not self._B_in_SepSet_AC(B_1, A, C): + continue + + # Although we do not yet know whether the rule applies, we here determine the new form of the link from A to C if the rule + # does apply + link_AC = self._get_link(A, C) + new_link_AC = "-" + link_AC[1] + ">" + pair_key, new_link = self._get_pair_key_and_new_link(A, C, new_link_AC) + + # For the search of uncovered potentially directed paths from B_1 to C, determine the initial pattern as dictated by the link + # from A to B_1 + first_link = self._get_link(A, B_1) + if self._match_link(pattern='o*o', link=first_link): + initial_allowed_patterns = ['-*>', 'o*>', 'o*o'] + elif self._match_link(pattern='o*>', link=first_link) or self._match_link(pattern='-*>', link=first_link): + initial_allowed_patterns = ['-*>'] + + # Return all uncovered potentially directed paths from B_1 to C + #uncovered_pd_paths = self._find_potentially_directed_paths(B_1, C, initial_allowed_patterns, return_if_any_path_found = False, + # uncovered=True, reduce_allowed_patterns=True, max_length = np.inf) + + # Find all uncovered potentially directed paths from B_1 to C + uncovered_pd_paths = self._get_potentially_directed_uncovered_paths(B_1, C, initial_allowed_patterns) + + # Run through all of these paths and check i) whether the node adjacent to B_1 is non-adjacent to A, ii) whether condition iv) of + # the rule antecedent is true. If there is any such path, then the link can be oriented + for upd_path in uncovered_pd_paths: + + # Is the node adjacent to B_1 non-adjacent to A (this implies that there are at least three nodes on the path, because else the + # node adjacent to B_1 is C) and is A not part of the path? + if len(upd_path) < 3 or A in upd_path or self._get_link(A, upd_path[1]) != "": + continue + + # If the link from A to B_1 is into B_1, condition iv) is true + if first_link[2] == ">": + # Mark the link from A to C for orientation, break the for loop to continue with the next triple + out.append((pair_key, new_link)) + break + + # If the link from A to B_1 is not in B_1, we need to check whether B_1 is in SepSet(A, X) where X is the node on upd_path next + # to B_1 + if not self._B_in_SepSet_AC(A, B_1, upd_path[1]): + # Continue with the next upd_path + continue + + # Now check whether rule iv) for all triples on upd_path + path_qualifies = True + for i in range(len(upd_path) - 2): + # We consider the unshielded triples upd_path[i] - upd_path[i+1] - upd_path[i+2] + + # If the link between upd_path[i] and upd_path[i+1] is into the latter, condition iv) is true + left_link = self._get_link(upd_path[i], upd_path[i+1]) + if left_link[2] == ">": + # The path qualifies, break the inner for loop + break + + # If not, then we need to continue with checking whether upd_path[i+1] in SepSet(upd_path[i+1], upd_path[i+2]) + if not self._B_in_SepSet_AC(upd_path[i], upd_path[i+1], upd_path[i+2]): + # The path does not qualifying, break the inner for loop + path_qualifies = False + break + + # The path qualifies, mark the edge from A to C for orientation and break the outer for loop to continue with the next triple + if path_qualifies: + out.append((pair_key, new_link)) + break + + # The path does not qualify, continue with the next upd_path + + # end for upd_path in uncovered_pd_paths + # end for (B_1, A, C) in all_appropriate_triples + + # Return the output list + return out + + + def _apply_ER10(self, only_lagged): + """Return all orientations implied by orientation rule R10^prime""" + + # Build the output list + out = [] + + # Find all triples A o--> C <-- P_C + all_appropriate_triples = set(self._find_triples(pattern_ij='o*>', pattern_jk='<*-', pattern_ik='')) + all_appropriate_triples = all_appropriate_triples.union(set(self._find_triples(pattern_ij='o*>', pattern_jk='<*-', pattern_ik='***'))) + + # Collect all triples for the given pair (A, C) + triple_sorting_dict = {} + for (A, C, P_C) in all_appropriate_triples: + if triple_sorting_dict.get((A, C)) is None: + triple_sorting_dict[(A, C)] = [P_C] + else: + triple_sorting_dict[(A, C)].append(P_C) + + + # Run through all (A, C) pairs + for (A, C) in triple_sorting_dict.keys(): + + if only_lagged and A[1] == C[1]: + continue + + # Find all uncovered potentially directed paths from A to C through any of the P_C nodes + relevant_paths = [] + for P_C in triple_sorting_dict[(A, C)]: + for upd_path in self._get_potentially_directed_uncovered_paths(A, P_C, ['-*>', 'o*>', 'o*o']): + + # Run through all of these paths and check i) whether the second to last element is not adjacent to C (this requires it to + # have a least three nodes, because else the second to last element would be A) and ii) whether the left edge of any 3-node + # sub-path is into the middle nor or, if not, whether the middle node is in the separating set of the two end-point nodes + # (of the 3-node) sub-path and iii) whether C is not element of the path. If path meets these conditions, add its second node + # (the adjacent to A) to the set second_nodes + + if len(upd_path) < 3 or C in upd_path or self._get_link(upd_path[-2], C) != "": + continue + + upd_path.append(C) + + path_qualifies = True + for i in range(len(upd_path) - 2): + # We consider the unshielded triples upd_path[i] - upd_path[i+1] - upd_path[i+2] + + # If the link between upd_path[i] and upd_path[i+1] is into the latter, the path qualifies + left_link = self._get_link(upd_path[i], upd_path[i+1]) + if left_link[2] == ">": + # The path qualifies, break the inner for loop + break + + # If not, then we need to continue with checking whether upd_path[i+1] in SepSet(upd_path[i+1], upd_path[i+2]) + if not self._B_in_SepSet_AC(upd_path[i], upd_path[i+1], upd_path[i+2]): + # The path does not qualify, break the inner for loop + path_qualifies = False + break + + # The path qualifies, add upd_path[i] to second_nodes and continue with the next upd_path + if path_qualifies: + relevant_paths.append(upd_path) + + # The path does not qualify, continue with the next upd_path + + # end for path in self._get_potentially_directed_uncovered_paths(A, P_C, ['-*>', 'o*>', 'o*o']) + # end for P_C in triple_sorting_dict[(A, C)] + + # Find all second nodes on the relevant paths + second_nodes = list({path[1] for path in relevant_paths}) + + # Check whether there is any pair of non-adjacent nodes in second_nodes, such that A is in their separating set. If yes, mark the link + # from A to C for orientation + for i, j in product(range(len(second_nodes)), range(len(second_nodes))): + + if i < j and self._get_link(second_nodes[i], second_nodes[j]) == "" and self._B_in_SepSet_AC(second_nodes[i], A, second_nodes[j]): + # Append new link and break the for loop + link_AC = self._get_link(A, C) + new_link_AC = "-" + link_AC[1] + ">" + out.append(self._get_pair_key_and_new_link(A, C, new_link_AC)) + break + + # end for (A, C) in triple_sorting_dict.keys() + + # Return the output list + return out + + + def _apply_ER00a(self, only_lagged): + """Return all orientations implied by orientation rule R0^prime a""" + + # Build the output list + out = [] + + # Find all graphical structures that the rule applies to + all_appropriate_triples = self._find_triples(pattern_ij='***', pattern_jk='***', pattern_ik='') + + # Run through all appropriate graphical structures + for (A, B, C) in all_appropriate_triples: + + # Unpack A, B, C + (i, lag_i) = A + (j, lag_j) = B + (k, lag_k) = C + + if only_lagged and (A[1] == B[1] or B[1] == C[1]): + continue + + # Get all weakly minimal separating sets in SepSet(A, C) + # Remark: The non weakly minimal separating sets may be larger, that's why we disfavor them + sepsets = self._get_sepsets(A, C) + sepsets = {Z for (Z, status) in sepsets if status == "wm"} + + ################################################################################### + ### Part 1) of the rule ########################################################### + + remove_AB = False + link_AB = self._get_link(A, B) + + # i) Middle mark must not be "x" or "-" + if link_AB[1] not in ['-', 'x']: + # Test A indep B given union(SepSet(A, C), intersection(def-anc(B), adj(B))) setminus{A, B} setminus{future of both A and B} + + # Conditioning on parents + Z_add = self._get_parents(A, B).difference({A, B}) + + # Shift the lags appropriately + if lag_i <= lag_j: + X = (i, lag_i - lag_j) # A shifted + Y = (j, 0) # B shifted + delta_lag = lag_j + + else: + X = (j, lag_j - lag_i) # B shifted + Y = (i, 0) # A shifted + delta_lag = lag_i + + # Run through all weakly minimal separating sets of A and C + for Z in sepsets: + + # Construct the conditioning set to test + Z_test = Z.union(Z_add).difference({A, B}) + Z_test = {(var, lag - delta_lag) for (var, lag) in Z_test if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max} + Z_add2 = {(var, lag - delta_lag) for (var, lag) in Z_add.difference({A, B}) if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max} + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test), tau_max = self.tau_max) + + if self.verbosity >= 2: + # print("ER00a(part1): %s _|_ %s | Z_test = %s: val = %.2f / pval = % .4f" % + # (X, Y, ' '.join([str(z) for z in list(Z_test)]), val, pval)) + print("ER00a(part1): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in Z_add2]), ' '.join([str(z) for z in Z_test]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic values and + # conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z_test)) + + # Check whether test result was significant + if pval > self.pc_alpha: + + # Mark the edge from X to Y for removal and save sepset + remove_AB = True + self._save_sepset(X, Y, (frozenset(Z_test), "nwm")) + + if remove_AB: + + # Remember the edge for removal + pair_key, new_link = self._get_pair_key_and_new_link(A, B, "") + out.append((pair_key, new_link)) + + ################################################################################### + ### Part 2) of the rule ########################################################### + + remove_CB = False + link_CB = self._get_link(C, B) + + # i) Middle mark must not be "x" or "-" + if link_CB[1] not in ['-', 'x']: + # Test C indep B given union(SepSet(A, C), intersection(def-anc(B), adj(B))) setminus{A, B} setminus{future of both C and B} + + # Conditioning on parents + Z_add = self._get_parents(C, B).difference({C, B}) + + # Shift the lags appropriately + if lag_k <= lag_j: + X = (k, lag_k - lag_j) + Y = (j, 0) + delta_lag = lag_j + else: + X = (j, lag_j - lag_k) + Y = (k, 0) + delta_lag = lag_k + + # Run through all weakly minimal separating sets of A and C + for Z in sepsets: + + # Construct the conditioning set to test + Z_test = Z.union(Z_add).difference({C, B}) + Z_test = {(var, lag - delta_lag) for (var, lag) in Z_test if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max} + Z_add2 = {(var, lag - delta_lag) for (var, lag) in Z_add.difference({A, B}) if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max} + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test), tau_max = self.tau_max) + + if self.verbosity >= 2: + # print("ER00a(part2): %s _|_ %s | Z_test = %s: val = %.2f / pval = % .4f" % + # (X, Y, ' '.join([str(z) for z in list(Z_test)]), val, pval)) + print("ER00a(part2): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in Z_add2]), ' '.join([str(z) for z in Z_test]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic values and + # conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z_test)) + + # Check whether test result was significant + if pval > self.pc_alpha: + + # Mark the edge from X to Y for removal and save sepset + remove_CB = True + self._save_sepset(X, Y, (frozenset(Z_test), "nwm")) + + if remove_CB: + + # Remember the edge for removal + pair_key, new_link = self._get_pair_key_and_new_link(C, B, "") + out.append((pair_key, new_link)) + + ################################################################################### + ### Part 3) of the rule ########################################################### + + if remove_AB or remove_CB or link_AB[2] in ["-", "x"] or link_CB[2] in ["-", "x"] or link_AB[1] == "x" or link_CB[1] == "x" or (link_AB[2] == ">" and link_CB[2] == ">"): + continue + + if self._B_not_in_SepSet_AC(A, B, C): + + # Prepare the new links and save them to the output + if link_AB[2] != ">": + new_link_AB = link_AB[0] + link_AB[1] + ">" + out.append(self._get_pair_key_and_new_link(A, B, new_link_AB)) + + new_link_CB = link_CB[0] + link_CB[1] + ">" + if link_CB[2] != ">": + out.append(self._get_pair_key_and_new_link(C, B, new_link_CB)) + + # end for (A, B, C) in all_appropriate_triples + + # Return the output list + return out + + + def _apply_ER00b(self, only_lagged): + """Return all orientations implied by orientation rule R0^prime b""" + + # Build the output list + out = [] + + # Find all graphical structures that the rule applies to + triples_1 = self._find_triples(pattern_ij='**>', pattern_jk='o!+', pattern_ik='') + triples_2 = [trip for trip in self._find_triples(pattern_ij='**>', pattern_jk='oR+', pattern_ik='') if self._is_smaller(trip[1], trip[2])] + triples_3 = [trip for trip in self._find_triples(pattern_ij='**>', pattern_jk='oL+', pattern_ik='') if self._is_smaller(trip[2], trip[1])] + all_appropriate_triples = set(triples_1).union(set(triples_2), set(triples_3)) + + # Run through all appropriate graphical structures + for (A, B, C) in all_appropriate_triples: + + # Unpack A, B, C + (i, lag_i) = A + (j, lag_j) = B + (k, lag_k) = C + + if only_lagged and A[1] == B[1]: + continue + + # Get all weakly minimal separating sets in SepSet(A, C) + # Remark: The non weakly minimal separating sets may be larger, that's why we disfavor them + sepsets = self._get_sepsets(A, C) + sepsets = {Z for (Z, status) in sepsets if status == "wm"} + + ################################################################################### + ### Part 1) of the rule ########################################################### + + remove_AB = False + link_AB = self._get_link(A, B) + + # i) Middle mark must not be "x" or "-" + if link_AB[1] not in ['-', 'x']: + # Test A indep B given union(SepSet(A, C), intersection(def-anc(B), adj(B))) setminus{A, B} setminus{future of both A and B} + + # Conditioning on parents + Z_add = self._get_parents(A, B).difference({A, B}) + + # Shift the lags appropriately + if lag_i <= lag_j: + X = (i, lag_i - lag_j) + Y = (j, 0) + delta_lag = lag_j + else: + X = (j, lag_j - lag_i) + Y = (i, 0) + delta_lag = lag_i + + # Run through all weakly minimal separating sets of A and C + for Z in sepsets: + + # Construct the conditioning set to test + Z_test = Z.union(Z_add).difference({A, B}) + Z_test = {(var, lag - delta_lag) for (var, lag) in Z_test if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max} + Z_add2 = {(var, lag - delta_lag) for (var, lag) in Z_add.difference({A, B}) if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max} + + # Test conditional independence of X and Y given Z + val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test), tau_max = self.tau_max) + + if self.verbosity >= 2: + # print("ER00b: %s _|_ %s | Z_test = %s: val = %.2f / pval = % .4f" % + # (X, Y, ' '.join([str(z) for z in list(Z_test)]), val, pval)) + print("ER00b: %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" % + (X, Y, ' '.join([str(z) for z in Z_add2]), ' '.join([str(z) for z in Z_test]), val, pval)) + + # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic values and + # conditioning set cardinalities + self._update_pval_val_card_dicts(X, Y, pval, val, len(Z_test)) + + # Check whether test result was significant + if pval > self.pc_alpha: + + # Mark the edge from X to Y for removal and save sepset + remove_AB = True + self._save_sepset(X, Y, (frozenset(Z_test), "nwm")) + + if remove_AB: + # Remember the edge for removal + pair_key, new_link = self._get_pair_key_and_new_link(A, B, "") + out.append((pair_key, new_link)) + + ################################################################################### + ### Part 2) of the rule ########################################################### + + if only_lagged and B[1] == C[1]: + continue + + if remove_AB or link_AB[1] == "x": + continue + + if self._B_not_in_SepSet_AC(A, B, C): + + # Prepare the new link and save it to the output + link_CB = self._get_link(C, B) + new_link_CB = link_CB[0] + link_CB[1] + ">" + out.append(self._get_pair_key_and_new_link(C, B, new_link_CB)) + + # end for (A, B, C) in all_appropriate_triples + + # Return the output list + return out + + + def _apply_ER00c(self, only_lagged): + """Return all orientations implied by orientation rule R0^prime c""" + + # Build the output list + out = [] + + # Find all graphical structures that the rule applies to + triples_1 = self._find_triples(pattern_ij='*-*', pattern_jk='o!+', pattern_ik='') + triples_2 = [trip for trip in self._find_triples(pattern_ij='*-*', pattern_jk='oR+', pattern_ik='') if self._is_smaller(trip[1], trip[2])] + triples_3 = [trip for trip in self._find_triples(pattern_ij='*-*', pattern_jk='oL+', pattern_ik='') + if self._is_smaller(trip[2], trip[1])] + all_appropriate_triples = set(triples_1).union(set(triples_2), set(triples_3)) + + # Run through all appropriate graphical structures + for (A, B, C) in all_appropriate_triples: + + if only_lagged and B[1] == C[1]: + continue + + # Check whether the rule applies + if self._B_not_in_SepSet_AC(A, B, C): + + # Prepare the new link and append it to the output + link_CB = self._get_link(C, B) + new_link_CB = link_CB[0] + link_CB[1] + ">" + out.append(self._get_pair_key_and_new_link(C, B, new_link_CB)) + + # end for (A, B, C) in all_appropriate_triples + + # Return the output list + return out + + + def _apply_ER00d(self, only_lagged): + """Return all orientations implied by orientation rule R0^prime d""" + + # Build the output list + out = [] + + # Find all graphical structures that the rule applies to + triples_1 = self._find_triples(pattern_ij='*-o', pattern_jk='o-*', pattern_ik='') + triples_2 = self._find_triples(pattern_ij='*->', pattern_jk='o-*', pattern_ik='') + all_appropriate_triples = set(triples_1).union(set(triples_2)) + + # Run through all appropriate graphical structures + for (A, B, C) in all_appropriate_triples: + + if only_lagged and (A[1] == B[1] and B[1] == C[1]): + continue + + # Check whether the rule applies + if self._B_not_in_SepSet_AC(A, B, C): + # Prepare the new links and append them to the output + + # From C to B + if not only_lagged or B[1] != C[1]: + link_CB = self._get_link(C, B) + new_link_CB = link_CB[0] + link_CB[1] + ">" + out.append(self._get_pair_key_and_new_link(C, B, new_link_CB)) + + # If needed, also fromA to B + link_AB = self._get_link(A, B) + if (not only_lagged or A[1] != B[1]) and link_AB[2] == "o": + new_link_AB = link_AB[0] + link_AB[1] + ">" + out.append(self._get_pair_key_and_new_link(A, B, new_link_AB)) + + # end for (A, B, C) in all_appropriate_triples + + # Return the output list + return out + + ######################################################################################################################## + ######################################################################################################################## + ######################################################################################################################## + + def _print_graph_dict(self): + """Print all links in graph_dict""" + + for j in range(self.N): + for ((i, lag_i), link) in self.graph_dict[j].items(): + if len(link) > 0 and (lag_i < 0 or i < j): + print("({},{:2}) {} {}".format(i, lag_i, link, (j, 0))) + + + def _get_link(self, A, B): + """Get the current link from node A to B""" + + (var_A, lag_A) = A + (var_B, lag_B) = B + + if abs(lag_A - lag_B) > self.tau_max: + return "" + elif lag_A <= lag_B: + return self.graph_dict[var_B][(var_A, lag_A - lag_B)] + else: + return self._reverse_link(self.graph_dict[var_A][(var_B, lag_B - lag_A)]) + + + def _get_non_future_adj(self, node_list): + """Return all non-future adjacencies of all nodes in node_list""" + + # Build the output starting from an empty set + out = set() + + # For each node W in node_list ... + for A in node_list: + # Unpack A + (var_A, lag_A) = A + # Add all (current) non-future adjacencies of A to the set out + out = out.union({(var, lag + lag_A) for ((var, lag), link) in self.graph_dict[var_A].items() if len(link) > 0 and lag + lag_A >= -self.tau_max}) + + # Return the desired set + return out + + def _update_pval_val_card_dicts(self, X, Y, pval, val, card): + """If 'pval' is larger than the current maximal p-value across all previous independence tests for X and Y (stored in self.pval_max) + then: Replace the current values stored in self.pval_max, self.pval_max_val, self.pval_max_card respectively by 'pval', 'val', and 'card'.""" + + if X[1] < 0 or X[0] < Y[0]: + if pval > self.pval_max[Y[0]][X]: + self.pval_max[Y[0]][X] = pval + self.pval_max_val[Y[0]][X] = val + self.pval_max_card[Y[0]][X] = card + else: + if pval > self.pval_max[X[0]][Y]: + self.pval_max[X[0]][Y] = pval + self.pval_max_val[X[0]][Y] = val + self.pval_max_card[X[0]][Y] = card + + def _save_sepset(self, X, Y, Z): + """Save Z as separating sets of X and Y. Y is assumed to be at lag 0""" + + # Unpack X and Y + (i, lag_i) = X + (j, lag_j) = Y + + assert lag_j == 0 + + # Save the sepset + if lag_i < 0 or i < j: + self.sepsets[j][X].add(Z) + else: + self.sepsets[i][Y].add(Z) + + def _reverse_link(self, link): + """Reverse a given link, taking care to replace > with < and vice versa""" + + if link == "": + return "" + + if link[2] == ">": + left_mark = "<" + else: + left_mark = link[2] + + if link[0] == "<": + right_mark = ">" + else: + right_mark = link[0] + + return left_mark + link[1] + right_mark + + + def _write_link(self, A, B, new_link, verbosity = 0): + """Write the information that the link from node A to node B takes the form of new_link into self.graph_dict. Neither is it assumed + that at least of the nodes is at lag 0, nor must A be before B. If A and B are contemporaneous, also the link from B to A is written + as the reverse of new_link""" + + # Unpack A and B + (var_A, lag_A) = A + (var_B, lag_B) = B + + # Write the link from A to B + if lag_A < lag_B: + + if verbosity >= 1: + print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Writing:", var_A, lag_A - lag_B, self.graph_dict[var_B][(var_A, lag_A - lag_B)], var_B, 0, var_A, lag_A - lag_B, new_link, var_B, 0)) + #print("Replacing {:3} from ({},{:2}) to {} with {:3}".format(self.graph_dict[var_B][(var_A, lag_A - lag_B)], var_A, lag_A - lag_B, (var_B, 0), new_link)) + + self.graph_dict[var_B][(var_A, lag_A - lag_B)] = new_link + + + elif lag_A == lag_B: + + if verbosity >= 1: + print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Writing:", var_A, lag_A - lag_B, self.graph_dict[var_B][(var_A, 0)], var_B, 0, var_A, 0, new_link, var_B, 0)) + #print("Replacing {:3} from ({},{:2}) to {} with {:3}".format(self.graph_dict[var_B][(var_A, 0)], var_A, 0, (var_B, 0), new_link)) + print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Writing:", var_B, 0, self.graph_dict[var_A][(var_B, 0)], var_A, 0, var_B, 0, self._reverse_link(new_link), var_A, 0)) + #print("Replacing {:3} from ({},{:2}) to {} with {:3}".format(self.graph_dict[var_A][(var_B, 0)], var_B, 0, (var_A, 0), self._reverse_link(new_link))) + + self.graph_dict[var_B][(var_A, 0)] = new_link + self.graph_dict[var_A][(var_B, 0)] = self._reverse_link(new_link) + + else: + + if verbosity >= 1: + print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Writing:", var_B, lag_B - lag_A, self.graph_dict[var_A][(var_B, lag_B - lag_A)], var_A, 0, var_B, lag_B - lag_A, self._reverse_link(new_link), var_A, 0)) + #print("Replacing {:3} from ({},{:2}) to {} with {:3}".format(self.graph_dict[var_A][(var_B, lag_B - lag_A)], var_B, lag_B - lag_A, (var_A, 0), self._reverse_link(new_link))) + + self.graph_dict[var_A][(var_B, lag_B - lag_A)] = self._reverse_link(new_link) + + + def _get_sepsets(self, A, B): + """For two non-adjacent nodes, get the their separating stored in self.sepsets.""" + + (var_A, lag_A) = A + (var_B, lag_B) = B + + def _shift(Z, lag_B): + return frozenset([(var, lag + lag_B) for (var, lag) in Z]) + + if lag_A < lag_B: + out = {(_shift(Z, lag_B), status) for (Z, status) in self.sepsets[var_B][(var_A, lag_A - lag_B)]} + elif lag_A > lag_B: + out = {(_shift(Z, lag_A), status) for (Z, status) in self.sepsets[var_A][(var_B, lag_B - lag_A)]} + else: + out = {(_shift(Z, lag_A), status) for (Z, status) in self.sepsets[max(var_A, var_B)][(min(var_A, var_B), 0)]} + + return out + + + def _initialize_full_graph(self): + """ + The function _get_na_pds_t() needs to know the future adjacencies of a given node, not only the non-future adjacencies that are + stored in self.graph_dict. To aid this, this function initializes the dictionary graph_full_dict: + + self.graph_full_dict[j][(i, -tau_i)] contains all adjacencies of (j, 0), in particular those for which tau_i < 0. + """ + + # Build from an empty nested dictionary + self.graph_full_dict = {j: {} for j in range(self.N)} + + # Run through the entire nested dictionary self.graph_dict + for j in range(self.N): + for ((var, lag), link) in self.graph_dict[j].items(): + + if link != "": + # Add non-future adjacencies + self.graph_full_dict[j][(var, lag)] = link + + # Add the future adjacencies + if lag < 0: + self.graph_full_dict[var][(j, -lag)] = self._reverse_link(link) + + # Return nothing + return None + + + def _get_pair_key_and_new_link(self, A, B, link_AB): + """The link from A to B takes the form link_AB. Bring this information into a form appropriate for the output of rule applications""" + + (var_A, lag_A) = A + (var_B, lag_B) = B + + if lag_A <= lag_B: + return ((var_A, var_B, lag_A - lag_B), link_AB) + elif lag_A > lag_B: + return ((var_B, var_A, lag_B - lag_A), self._reverse_link(link_AB)) + + + def _match_link(self, pattern, link): + """Matches pattern including wildcards with link.""" + + if pattern == '' or link == '': + return True if pattern == link else False + else: + left_mark, middle_mark, right_mark = pattern + if left_mark != '*': + if left_mark == '+': + if link[0] not in ['<', 'o']: return False + else: + if link[0] != left_mark: return False + + if right_mark != '*': + if right_mark == '+': + if link[2] not in ['>', 'o']: return False + else: + if link[2] != right_mark: return False + + if middle_mark != '*' and link[1] != middle_mark: return False + + return True + + + def _dict2graph(self): + """Convert self.graph_dict to graph array of shape (N, N, self.tau_max + 1).""" + + graph = np.zeros((self.N, self.N, self.tau_max + 1), dtype='U3') + for j in range(self.N): + for adj in self.graph_dict[j]: + (i, lag_i) = adj + graph[i, j, abs(lag_i)] = self.graph_dict[j][adj] + + return graph + + + def _find_adj(self, graph, node, patterns, exclude=None, ignore_time_bounds=True): + """Find adjacencies of node matching patterns.""" + + # Setup + i, lag_i = node + if exclude is None: exclude = [] + if type(patterns) == str: + patterns = [patterns] + + # Init + adj = [] + # Find adjacencies going forward/contemp + for k, lag_ik in zip(*np.where(graph[i,:,:])): + matches = [self._match_link(patt, graph[i, k, lag_ik]) for patt in patterns] + if np.any(matches): + match = (k, lag_i + lag_ik) + if match not in adj and (k, lag_i + lag_ik) not in exclude and (-self.tau_max <= lag_i + lag_ik <= 0 or ignore_time_bounds): + adj.append(match) + + # Find adjacencies going backward/contemp + for k, lag_ki in zip(*np.where(graph[:,i,:])): + matches = [self._match_link(self._reverse_link(patt), graph[k, i, lag_ki]) for patt in patterns] + if np.any(matches): + match = (k, lag_i - lag_ki) + if match not in adj and (k, lag_i - lag_ki) not in exclude and (-self.tau_max <= lag_i - lag_ki <= 0 or ignore_time_bounds): + adj.append(match) + + return adj + + + def _is_match(self, graph, X, Y, pattern_ij): + """Check whether the link between X and Y agrees with pattern_ij""" + + (i, lag_i) = X + (j, lag_j) = Y + tauij = lag_j - lag_i + if abs(tauij) >= graph.shape[2]: + return False + return ((tauij >= 0 and self._match_link(pattern_ij, graph[i, j, tauij])) or + (tauij < 0 and self._match_link(self._reverse_link(pattern_ij), graph[j, i, abs(tauij)]))) + + + def _find_triples(self, pattern_ij, pattern_jk, pattern_ik): + """Find triples (i, lag_i), (j, lag_j), (k, lag_k) that match patterns.""" + + # Graph as array makes it easier to search forward AND backward in time + graph = self._dict2graph() + + # print(graph[:,:,0]) + # print(graph[:,:,1]) + # print("matching ", pattern_ij, pattern_jk, pattern_ik) + + matched_triples = [] + + for i in range(self.N): + # Set lag_i = 0 without loss of generality, will be adjusted at end + lag_i = 0 + adjacencies_i = self._find_adj(graph, (i, lag_i), pattern_ij) + # print(i, adjacencies_i) + for (j, lag_j) in adjacencies_i: + + adjacencies_j = self._find_adj(graph, (j, lag_j), pattern_jk, + exclude=[(i, lag_i)]) + # print(j, adjacencies_j) + for (k, lag_k) in adjacencies_j: + if self._is_match(graph, (i, lag_i), (k, lag_k), pattern_ik): + # Now use stationarity and shift triple such that the right-most + # node (on a line t=..., -2, -1, 0, 1, 2, ...) is at lag 0 + righmost_lag = max(lag_i, lag_j, lag_k) + match = ((i, lag_i - righmost_lag), + (j, lag_j - righmost_lag), + (k, lag_k - righmost_lag)) + largest_lag = min(lag_i - righmost_lag, lag_j - righmost_lag, lag_k - righmost_lag) + if match not in matched_triples and \ + -self.tau_max <= largest_lag <= 0: + matched_triples.append(match) + + return matched_triples + + + def _find_quadruples(self, pattern_ij, pattern_jk, pattern_ik, + pattern_il, pattern_jl, pattern_kl): + """Find quadruples (i, lag_i), (j, lag_j), (k, lag_k), (l, lag_l) that match patterns.""" + + # We assume this later + assert pattern_il != '' + + # Graph as array makes it easier to search forward AND backward in time + graph = self._dict2graph() + + matched_quadruples = [] + + # First get triple ijk + ijk_triples = self._find_triples(pattern_ij, pattern_jk, pattern_ik) + + for triple in ijk_triples: + # Unpack triple + (i, lag_i), (j, lag_j), (k, lag_k) = triple + + # Search through adjacencies + adjacencies = set(self._find_adj(graph, (i, lag_i), pattern_il, + exclude=[(j, lag_j), (k, lag_k)])) + if pattern_jl != '': + adjacencies = adjacencies.intersection(set( + self._find_adj(graph, (j, lag_j), pattern_jl, + exclude=[(i, lag_i), (k, lag_k)]))) + else: + adjacencies = set([adj for adj in adjacencies + if self._is_match(graph, (j, lag_j), adj, '')]) + + if pattern_kl != '': + adjacencies = adjacencies.intersection(set( + self._find_adj(graph, (k, lag_k), pattern_kl, + exclude=[(i, lag_i), (j, lag_j)]))) + else: + adjacencies = set([adj for adj in adjacencies + if self._is_match(graph, (k, lag_k), adj, '')]) + + for adj in adjacencies: + (l, lag_l) = adj + + # Now use stationarity and shift quadruple such that the right-most + # node (on a line t=..., -2, -1, 0, 1, 2, ...) is at lag 0 + righmost_lag = max(lag_i, lag_j, lag_k, lag_l) + match = ((i, lag_i - righmost_lag), + (j, lag_j - righmost_lag), + (k, lag_k - righmost_lag), + (l, lag_l - righmost_lag), + ) + largest_lag = min(lag_i - righmost_lag, + lag_j - righmost_lag, + lag_k - righmost_lag, + lag_l - righmost_lag, + ) + if match not in matched_quadruples and \ + -self.tau_max <= largest_lag <= 0: + matched_quadruples.append(match) + + return matched_quadruples + + + def _get_R4_discriminating_paths(self, triple, max_length = np.inf): + """Find all discriminating paths starting from triple""" + + def _search(path_taken, max_length): + + # Get the last visited node and its link to Y + last_node = path_taken[-1] + link_to_Y = self._get_link(last_node, path_taken[0]) + + # Base Case: If the current path is a discriminating path, return it as single entry of a list + if len(path_taken) > 3 and link_to_Y == "": + return [path_taken] + + # If the current path is not a discriminating path, continue the path + paths = [] + + if self._get_link(last_node, path_taken[-2])[0] == "<" and link_to_Y == "-->" and len(path_taken) < max_length: + + # Search through all adjacencies of the last node + for (var, lag) in self.graph_full_dict[last_node[0]].keys(): + + # Build the next node and get its link to the previous + next_node = (var, lag + last_node[1]) + next_link = self._get_link(next_node, last_node) + + # Check whether this node can be visited + if next_node[1] <= 0 and next_node[1] >= -self.tau_max and next_node not in path_taken and self._match_link("*->", next_link): + + # Recursive call + paths.extend(_search(path_taken[:] + [next_node], max_length)) + + # Return the list of discriminating paths + return paths + + # Unpack the triple + (W, V, Y) = triple + + # Return all discriminating paths starting at this triple + return _search([Y, V, W], max_length) + + + def _get_potentially_directed_uncovered_paths(self, start_node, end_node, initial_allowed_patterns): + """Find all potentiall directed uncoverged paths from start_node to end_node whose first link takes one the forms specified by + initial_allowed_patters""" + + assert start_node != end_node + + # Function for recursive search of potentially directed uncovered paths + def _search(end_node, path_taken, allowed_patterns): + + # List for outputting potentially directed uncovered paths + paths = [] + + # The last visited note becomes the new start_node + start_node = path_taken[-1] + + # Base case: End node has been reached + if start_node == end_node: + paths.append(path_taken) + + # Recursive build case + else: + # Run through the adjacencies of start_node + #for next_node in self.graph_full_dict[start_node[0]]: + for (var, lag) in self.graph_full_dict[start_node[0]].keys(): + + next_node = (var, lag + start_node[1]) + + # Consider only nodes that ... + # ... are within the allowed time frame + if next_node[1] < -self.tau_max or next_node[1] > 0: + continue + # ... have not been visited yet + if next_node in path_taken: + continue + # ... are non-adjacent to the node before start_node + if len(path_taken) >= 2 and self._get_link(path_taken[-2], next_node) != "": + continue + # ... whose link with start_node matches one of the allowed patters + link = self._get_link(start_node, next_node) + if not any([self._match_link(pattern = pattern, link = link) for pattern in allowed_patterns]): + continue + + # Determine the allowed patters for the next recursive call + if self._match_link(pattern='o*o', link=link): + new_allowed_patters = ["o*o", "o*>", "-*>"] + elif self._match_link(pattern='o*>', link=link) or self._match_link(pattern='-*>', link=link): + new_allowed_patters = ["-*>"] + + # Determine the new path taken + new_path_taken = path_taken[:] + [next_node] + + # Recursive call + paths.extend(_search(end_node, new_path_taken, new_allowed_patters)) + + # Output list of potentially directed uncovered paths + return paths + + # end def _search(end_node, path_taken, allowed_patterns) + + # Output potentially directed uncovered paths + paths = _search(end_node, [start_node], initial_allowed_patterns) + return [path for path in paths if len(path) > 2] + + + def _sort_search_set(self, search_set, reference_node): + """Sort the nodes in search_set by their values in self.pval_max_val with respect to the reference_node. Nodes with higher values + appear earlier""" + + sort_by_potential_Nones = [self._get_pval_max_val(node, reference_node) for node in search_set] + sort_by = [(np.abs(value) if value is not None else np.inf) for value in sort_by_potential_Nones] + + return [x for _, x in sorted(zip(sort_by, search_set), reverse = True)] + + def _get_pval_max_val(self, X, Y): + """Return the test statistic value of that independence test for X and Y which, among all such tests, has the largest p-value.""" + + if X[1] < 0 or X[0] < Y[0]: + return self.pval_max_val[Y[0]][X] + else: + return self.pval_max_val[X[0]][Y] + + def _delete_sepsets(self, X, Y): + """Delete all separating sets of X and Y. Y is assumed to be at lag 0""" + + # Unpack X and Y + (i, lag_i) = X + (j, lag_j) = Y + + assert lag_j == 0 + + # Save the sepset + if lag_i < 0 or i < j: + self.sepsets[j][X] = set() + else: + self.sepsets[i][Y] = set() + + def _dict_to_matrix(self, val_dict, tau_max, n_vars, default=1): + """Convert a dictionary to matrix format""" + + matrix = np.ones((n_vars, n_vars, tau_max + 1)) + matrix *= default + + for j in val_dict.keys(): + for link in val_dict[j].keys(): + k, tau = link + if tau == 0: + matrix[k, j, 0] = matrix[j, k, 0] = val_dict[j][link] + else: + matrix[k, j, abs(tau)] = val_dict[j][link] + return matrix
+ + +if __name__ == '__main__': + + from tigramite.independence_tests import ParCorr + import tigramite.data_processing as pp + from tigramite.toymodels import structural_causal_processes as toys + import tigramite.plotting as tp + from matplotlib import pyplot as plt + + # Example process to play around with + # Each key refers to a variable and the incoming links are supplied + # as a list of format [((var, -lag), coeff, function), ...] + def lin_f(x): return x + def nonlin_f(x): return (x + 5. * x ** 2 * np.exp(-x ** 2 / 20.)) + + links = {0: [((0, -1), 0.9, lin_f), ((3, -1), -0.6, lin_f)], + 1: [((1, -1), 0.9, lin_f), ((3, -1), 0.6, lin_f)], + 2: [((2, -1), 0.9, lin_f), ((1, -1), 0.6, lin_f)], + 3: [], + } + + full_data, nonstat = toys.structural_causal_process(links, + T=1000, seed=7) + + # We now remove variable 3 which plays the role of a hidden confounder + data = full_data[:, [0, 1, 2]] + + # Data must be array of shape (time, variables) + print(data.shape) + dataframe = pp.DataFrame(data) + cond_ind_test = ParCorr() + pcmci = LPCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) + results = pcmci.run_lpcmci_experimental(tau_max=2, pc_alpha=0.01) + + # For a proper causal interpretation of the graph see the paper! + print(results['graph']) + tp.plot_graph(graph=results['graph'], val_matrix=results['val_matrix']) + plt.show() + +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/models.html b/docs/_build/html/_modules/tigramite/models.html new file mode 100644 index 00000000..093f235e --- /dev/null +++ b/docs/_build/html/_modules/tigramite/models.html @@ -0,0 +1,1695 @@ + + + + + + + tigramite.models — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.models

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+from copy import deepcopy
+
+import numpy as np
+
+from tigramite.data_processing import DataFrame
+from tigramite.pcmci import PCMCI
+
+# try:
+import sklearn
+import sklearn.linear_model
+# except:
+#     print("Could not import sklearn...")
+
+try:
+    import networkx
+except:
+    print("Could not import networkx, LinearMediation plots not possible...")
+
+
+
[docs]class Models(): + """Base class for time series models. + + Allows to fit any model from sklearn to the parents of a target variable. + Also takes care of missing values, masking and preprocessing. + + Parameters + ---------- + dataframe : data object + Tigramite dataframe object. It must have the attributes dataframe.values + yielding a numpy array of shape (observations T, variables N) and + optionally a mask of the same shape and a missing values flag. + model : sklearn model object + For example, sklearn.linear_model.LinearRegression() for a linear + regression model. + conditional_model : sklearn model object, optional (default: None) + Used to fit conditional causal effects in nested regression. + If None, model is used. + data_transform : sklearn preprocessing object, optional (default: None) + Used to transform data prior to fitting. For example, + sklearn.preprocessing.StandardScaler for simple standardization. The + fitted parameters are stored. + mask_type : {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. Explained in tutorial on masking and missing values. + verbosity : int, optional (default: 0) + Level of verbosity. + """ + + def __init__(self, + dataframe, + model, + conditional_model=None, + data_transform=sklearn.preprocessing.StandardScaler(), + mask_type=None, + verbosity=0): + # Set the mask type and dataframe object + self.mask_type = mask_type + self.dataframe = dataframe + # Get the number of nodes for this dataset + self.N = self.dataframe.values.shape[1] + # Set the model to be used + self.model = model + if conditional_model is None: + self.conditional_model = model + else: + self.conditional_model = conditional_model + # Set the data_transform object and verbosity + self.data_transform = data_transform + self.verbosity = verbosity + # Initialize the object that will be set later + self.all_parents = None + self.selected_variables = None + self.tau_max = None + self.fit_results = None + +
[docs] def get_general_fitted_model(self, + Y, X, Z=None, + conditions=None, + tau_max=None, + cut_off='max_lag_or_tau_max', + return_data=False): + """Fit time series model. + + For each variable in selected_variables, the sklearn model is fitted + with :math:`y` given by the target variable, and :math:`X` given by its + parents. The fitted model class is returned for later use. + + Parameters + ---------- + X, Y, Z : lists of tuples + List of variables for estimating model Y = f(X,Z) + conditions : list of tuples. + Conditions for estimating conditional causal effects. + tau_max : int, optional (default: None) + Maximum time lag. If None, the maximum lag in all_parents is used. + cut_off : {'max_lag_or_tau_max', '2xtau_max', 'max_lag'} + How many samples to cutoff at the beginning. The default is + 'max_lag_or_tau_max', which uses the maximum of tau_max and the + conditions. This is useful to compare multiple models on the same + sample. Other options are '2xtau_max', which guarantees that MCI + tests are all conducted on the same samples. Last, 'max_lag' uses + as much samples as possible. + return_data : bool, optional (default: False) + Whether to save the data array. + + Returns + ------- + fit_results : dictionary of sklearn model objects for each variable + Returns the sklearn model after fitting. Also returns the data + transformation parameters. + """ + + self.X = X + self.Y = Y + + if conditions is None: + conditions = [] + self.conditions = conditions + + if Z is not None: + Z = [z for z in Z if z not in conditions] + + self.Z = Z + + self.cut_off = cut_off + + # Find the maximal conditions lag + max_lag = 0 + for y in self.Y: + this_lag = np.abs(np.array(self.X + self.Z + self.conditions)[:, 1]).max() + max_lag = max(max_lag, this_lag) + # Set the default tau max and check if it should be overwritten + if tau_max is None: + self.tau_max = max_lag + else: + self.tau_max = tau_max + if self.tau_max < max_lag: + raise ValueError("tau_max = %d, but must be at least " + " max_lag = %d" + "" % (self.tau_max, max_lag)) + + # Initialize the fit results + fit_results = {} + for y in self.Y: + # Construct array of shape (var, time) with first entry being + # a dummy, second is y followed by joint X and Z (ignore the notation in construct_array) + array, xyz = \ + self.dataframe.construct_array(X=self.X, Y=[y] + self.Z, Z=self.conditions, + tau_max=self.tau_max, + mask_type=self.mask_type, + cut_off=self.cut_off, + verbosity=self.verbosity) + + + # Transform the data if needed + if self.data_transform is not None: + array = self.data_transform.fit_transform(X=array.T).T + + # Cache array for use in prediction + self.observation_array = array + self.xyz = xyz + # Fit the model + # Copy and fit the model + a_model = deepcopy(self.model) + + predictor_indices = list(np.where(xyz==0)[0]) \ + + list(np.where(xyz==1)[0][1:]) \ + + list(np.where(xyz==2)[0]) + predictor_array = array[predictor_indices, :].T + # Target is only first entry of Y, ie [y] + target_array = array[np.where(xyz==1)[0][0], :] + + a_model.fit(X=predictor_array, y=target_array) + # Cache the results + fit_results[y] = {} + fit_results[y]['model'] = a_model + # Cache the data transform + fit_results[y]['data_transform'] = deepcopy(self.data_transform) + # Cache the data if needed + if return_data: + fit_results[y]['data'] = array + + # Cache and return the fit results + self.fit_results = fit_results + return fit_results
+ +
[docs] def get_general_prediction(self, + intervention_data, + conditions_data=None, + pred_params=None, + ): + r"""Predict effect of intervention with fitted model. + + Uses the model.predict() function of the sklearn model. + + Parameters + ---------- + intervention_data : numpy array + Numpy array of shape (time, len(X)) that contains the do(X) values. + conditions_data : data object, optional + Numpy array of shape (time, len(S)) that contains the S=s values. + pred_params : dict, optional + Optional parameters passed on to sklearn prediction function. + + Returns + ------- + Results from prediction. + """ + + intervention_T, lenX = intervention_data.shape + + if intervention_data.shape[1] != len(self.X): + raise ValueError("intervention_data.shape[1] must be len(X).") + + if conditions_data is not None: + if conditions_data.shape[1] != len(self.conditions): + raise ValueError("conditions_data.shape[1] must be len(S).") + if conditions_data.shape[0] != intervention_data.shape[0]: + raise ValueError("conditions_data.shape[0] must match intervention_data.shape[0].") + + lenS = len(self.conditions) + + lenY = len(self.Y) + + predicted_array = np.zeros((intervention_T, lenY)) + pred_dict = {} + for iy, y in enumerate(self.Y): + # Print message + if self.verbosity > 1: + print("\n## Predicting target %s" % str(y)) + if pred_params is not None: + for key in list(pred_params): + print("%s = %s" % (key, pred_params[key])) + # Default value for pred_params + if pred_params is None: + pred_params = {} + # Check this is a valid target + if y not in self.fit_results: + raise ValueError("y = %s not yet fitted" % str(y)) + + # Transform the data if needed + a_transform = self.fit_results[y]['data_transform'] + if a_transform is not None: + intervention_data = a_transform.transform(X=intervention_data) + if self.conditions is not None and conditions_data is not None: + conditions_data = a_transform.transform(X=conditions_data) + + # Extract observational Z from stored array + z_indices = list(np.where(self.xyz==1)[0][1:]) + z_array = self.observation_array[z_indices, :].T + Tobs = len(z_array) + + if self.conditions is not None and conditions_data is not None: + s_indices = list(np.where(self.xyz==2)[0]) + s_array = self.observation_array[s_indices, :].T + + # Now iterate through interventions (and potentially S) + for index, dox_vals in enumerate(intervention_data): + # Construct XZS-array + intervention_array = dox_vals.reshape(1, lenX) * np.ones((Tobs, lenX)) + if self.conditions is not None and conditions_data is not None: + conditions_array = conditions_data[index].reshape(1, lenS) * np.ones((Tobs, lenS)) + predictor_array = np.hstack((intervention_array, z_array, conditions_array)) + else: + predictor_array = np.hstack((intervention_array, z_array)) + + predicted_vals = self.fit_results[y]['model'].predict( + X=predictor_array, **pred_params) + + if self.conditions is not None and conditions_data is not None: + + # if a_transform is not None: + # predicted_vals = a_transform.transform(X=target_array.T).T + a_conditional_model = deepcopy(self.conditional_model) + + a_conditional_model.fit(X=s_array, y=predicted_vals) + self.fit_results[y]['conditional_model'] = a_conditional_model + + predicted_array[index, iy] = a_conditional_model.predict( + X=conditions_array, **pred_params).mean() + + else: + predicted_array[index, iy] = predicted_vals.mean() + + return predicted_array
+ + + # def get_general_prediction(self, + # intervention_data=None, + # conditions_data=None, + # pred_params=None, + # ): + # r"""Predict effect of intervention with fitted model. + + # Uses the model.predict() function of the sklearn model. + + # Parameters + # ---------- + # intervention_data : data object, optional + # Tigramite dataframe object with optional new mask. Only the + # values for X will be extracted. + # conditions_data : data object, optional + # Tigramite dataframe object with optional new mask. Only the + # values for conditions will be extracted. + # pred_params : dict, optional + # Optional parameters passed on to sklearn prediction function. + + # Returns + # ------- + # Results from prediction. + # """ + + # pred_dict = {} + # for y in self.Y: + # # Print message + # if self.verbosity > 1: + # print("\n## Predicting target %s" % str(y)) + # if pred_params is not None: + # for key in list(pred_params): + # print("%s = %s" % (key, pred_params[key])) + # # Default value for pred_params + # if pred_params is None: + # pred_params = {} + # # Check this is a valid target + # if y not in self.fit_results: + # raise ValueError("y = %s not yet fitted" % str(y)) + # # Construct the array form of the data + # # Check if we've passed a new dataframe object + # observation_array, xyz = \ + # self.dataframe.construct_array(X=self.X, Y=[y] + self.Z, Z=self.conditions, + # tau_max=self.tau_max, + # # mask=self.test_mask, + # mask_type=self.mask_type, + # cut_off=self.cut_off, + # verbosity=self.verbosity) + + # intervention_array = np.copy(observation_array) + # if intervention_data is not None: + # tmp_array, _ = intervention_data.construct_array(X=self.X, Y=[y] + self.Z, + # Z=self.conditions, + # tau_max=self.tau_max, + # mask_type=self.mask_type, + # cut_off=self.cut_off, + # verbosity=self.verbosity) + + # # Only replace X-variables in intervention_array (necessary if lags of + # # X are in Z...) + # for index in np.where(xyz==0)[0]: + # intervention_array[index] = tmp_array[index] + + # if self.conditions is not None and conditions_data is not None: + # tmp_array, _ = conditions_data.construct_array(X=self.X, Y=[y] + self.Z, + # Z=self.conditions, + # tau_max=self.tau_max, + # mask_type=self.mask_type, + # cut_off=self.cut_off, + # verbosity=self.verbosity) + + # # Only replace condition-variables in intervention_array + # # (necessary if lags of X are in Z...) + # for index in np.where(xyz==2)[0]: + # intervention_array[index] = tmp_array[index] + + # # Transform the data if needed + # a_transform = self.fit_results[y]['data_transform'] + # if a_transform is not None: + # intervention_array = a_transform.transform(X=intervention_array.T).T + # # Cache the test array + # self.intervention_array = intervention_array + # # Run the predictor, for Y only the Z-part is used, the first index is y + # predictor_indices = list(np.where(xyz==0)[0]) \ + # + list(np.where(xyz==1)[0][1:]) \ + # + list(np.where(xyz==2)[0]) + # predictor_array = intervention_array[predictor_indices, :].T + + # pred_dict[y] = self.fit_results[y]['model'].predict( + # X=predictor_array, **pred_params) + + # # print(pred_dict[y]) + # if self.conditions is not None and conditions_data is not None: + + # a_conditional_model = deepcopy(self.conditional_model) + + # # Fit Y|do(X) on S + # conditions_array = observation_array[list(np.where(xyz==2)[0])] + # target_array = pred_dict[y] # array[np.where(xyz==1)[0][0], :] + + # if a_transform is not None: + # conditions_array = a_transform.transform(X=conditions_array.T).T + # target_array = a_transform.transform(X=target_array.T).T + + # a_conditional_model.fit(X=conditions_array.T, y=target_array) + # self.fit_results[y]['conditional_model'] = a_conditional_model + + # # Now predict conditional causal effect for new conditions + # tmp_array, _ = conditions_data.construct_array(X=self.X, Y=[y] + self.Z, + # Z=self.conditions, + # tau_max=self.tau_max, + # mask_type=self.mask_type, + # cut_off=self.cut_off, + # verbosity=self.verbosity) + + # # Construct conditions array + # new_conditions_array = tmp_array[list(np.where(xyz==2)[0])] + + # if a_transform is not None: + # new_conditions_array = a_transform.transform(X=new_conditions_array.T).T + + # pred_dict[y] = a_conditional_model.predict( + # X=new_conditions_array.T, **pred_params) + + # return pred_dict + + +
[docs] def get_fit(self, all_parents, + selected_variables=None, + tau_max=None, + cut_off='max_lag_or_tau_max', + return_data=False): + """Fit time series model. + + For each variable in selected_variables, the sklearn model is fitted + with :math:`y` given by the target variable, and :math:`X` given by its + parents. The fitted model class is returned for later use. + + Parameters + ---------- + all_parents : dictionary + Dictionary of form {0:[(0, -1), (3, 0), ...], 1:[], ...} containing + the parents estimated with PCMCI. + selected_variables : list of integers, optional (default: range(N)) + Specify to estimate parents only for selected variables. If None is + passed, parents are estimated for all variables. + tau_max : int, optional (default: None) + Maximum time lag. If None, the maximum lag in all_parents is used. + cut_off : {'max_lag_or_tau_max', '2xtau_max', 'max_lag'} + How many samples to cutoff at the beginning. The default is + 'max_lag_or_tau_max', which uses the maximum of tau_max and the + conditions. This is useful to compare multiple models on the same + sample. Other options are '2xtau_max', which guarantees that MCI + tests are all conducted on the same samples. Last, 'max_lag' uses + as much samples as possible. + return_data : bool, optional (default: False) + Whether to save the data array. + + Returns + ------- + fit_results : dictionary of sklearn model objects for each variable + Returns the sklearn model after fitting. Also returns the data + transformation parameters. + """ + # Initialize the fit by setting the instance's all_parents attribute + self.all_parents = all_parents + # Set the default selected variables to all variables and check if this + # should be overwritten + self.selected_variables = range(self.N) + if selected_variables is not None: + self.selected_variables = selected_variables + # Find the maximal parents lag + max_parents_lag = 0 + for j in self.selected_variables: + if all_parents[j]: + this_parent_lag = np.abs(np.array(all_parents[j])[:, 1]).max() + max_parents_lag = max(max_parents_lag, this_parent_lag) + # Set the default tau_max and check if it should be overwritten + self.tau_max = max_parents_lag + if tau_max is not None: + self.tau_max = tau_max + if self.tau_max < max_parents_lag: + raise ValueError("tau_max = %d, but must be at least " + " max_parents_lag = %d" + "" % (self.tau_max, max_parents_lag)) + # Initialize the fit results + fit_results = {} + for j in self.selected_variables: + Y = [(j, 0)] + X = [(j, 0)] # dummy + Z = self.all_parents[j] + array, xyz = \ + self.dataframe.construct_array(X, Y, Z, + tau_max=self.tau_max, + mask_type=self.mask_type, + cut_off=cut_off, + verbosity=self.verbosity) + # Get the dimensions out of the constructed array + dim, T = array.shape + dim_z = dim - 2 + # Transform the data if needed + if self.data_transform is not None: + array = self.data_transform.fit_transform(X=array.T).T + # Fit the model if there are any parents for this variable to fit + if dim_z > 0: + # Copy and fit the model + a_model = deepcopy(self.model) + a_model.fit(X=array[2:].T, y=array[1]) + # Cache the results + fit_results[j] = {} + fit_results[j]['model'] = a_model + # Cache the data transform + fit_results[j]['data_transform'] = deepcopy(self.data_transform) + # Cache the data if needed + if return_data: + fit_results[j]['data'] = array + # If there are no parents, skip this variable + else: + fit_results[j] = None + + # Cache and return the fit results + self.fit_results = fit_results + return fit_results
+ +
[docs] def get_coefs(self): + """Returns dictionary of coefficients for linear models. + + Only for models from sklearn.linear_model + + Returns + ------- + coeffs : dictionary + Dictionary of dictionaries for each variable with keys given by the + parents and the regression coefficients as values. + """ + coeffs = {} + for j in self.selected_variables: + coeffs[j] = {} + for ipar, par in enumerate(self.all_parents[j]): + coeffs[j][par] = self.fit_results[j]['model'].coef_[ipar] + return coeffs
+ +
[docs] def get_val_matrix(self): + """Returns the coefficient array for different lags for linear model. + + Requires fit_model() before. An entry val_matrix[i,j,tau] gives the + coefficient of the link from i to j at lag tau, including tau=0. + + Returns + ------- + val_matrix : array-like, shape (N, N, tau_max + 1) + Array of coefficients for each time lag, including lag-zero. + """ + + coeffs = self.get_coefs() + val_matrix = np.zeros((self.N, self.N, self.tau_max + 1, )) + + for j in list(coeffs): + for par in list(coeffs[j]): + i, tau = par + val_matrix[i,j,abs(tau)] = coeffs[j][par] + + return val_matrix
+ +
[docs]class LinearMediation(Models): + r"""Linear mediation analysis for time series models. + + Fits linear model to parents and provides functions to return measures such + as causal effect, mediated causal effect, average causal effect, etc. as + described in [4]_. + + Notes + ----- + This class implements the following causal mediation measures introduced in + [4]_: + + * causal effect (CE) + * mediated causal effect (MCE) + * average causal effect (ACE) + * average causal susceptibility (ACS) + * average mediated causal effect (AMCE) + + Consider a simple model of a causal chain as given in the Example with + + .. math:: X_t &= \eta^X_t \\ + Y_t &= 0.5 X_{t-1} + \eta^Y_t \\ + Z_t &= 0.5 Y_{t-1} + \eta^Z_t + + Here the link coefficient of :math:`X_{t-2} \to Z_t` is zero while the + causal effect is 0.25. MCE through :math:`Y` is 0.25 implying that *all* + of the the CE is explained by :math:`Y`. ACE from :math:`X` is 0.37 since it + has CE 0.5 on :math:`Y` and 0.25 on :math:`Z`. + + Examples + -------- + >>> numpy.random.seed(42) + >>> links_coeffs = {0: [], 1: [((0, -1), 0.5)], 2: [((1, -1), 0.5)]} + >>> data, true_parents = toys.var_process(links_coeffs, T=1000) + >>> dataframe = pp.DataFrame(data) + >>> med = LinearMediation(dataframe=dataframe) + >>> med.fit_model(all_parents=true_parents, tau_max=3) + >>> print "Link coefficient (0, -2) --> 2: ", med.get_coeff( + i=0, tau=-2, j=2) + >>> print "Causal effect (0, -2) --> 2: ", med.get_ce(i=0, tau=-2, j=2) + >>> print "Mediated Causal effect (0, -2) --> 2 through 1: ", med.get_mce( + i=0, tau=-2, j=2, k=1) + >>> print "Average Causal Effect: ", med.get_all_ace() + >>> print "Average Causal Susceptibility: ", med.get_all_acs() + >>> print "Average Mediated Causal Effect: ", med.get_all_amce() + Link coefficient (0, -2) --> 2: 0.0 + Causal effect (0, -2) --> 2: 0.250648072987 + Mediated Causal effect (0, -2) --> 2 through 1: 0.250648072987 + Average Causal Effect: [ 0.36897445 0.25718002 0. ] + Average Causal Susceptibility: [ 0. 0.24365041 0.38250406] + Average Mediated Causal Effect: [ 0. 0.12532404 0. ] + + References + ---------- + .. [4] J. Runge et al. (2015): Identifying causal gateways and mediators in + complex spatio-temporal systems. + Nature Communications, 6, 8502. http://doi.org/10.1038/ncomms9502 + + Parameters + ---------- + dataframe : data object + Tigramite dataframe object. It must have the attributes dataframe.values + yielding a numpy array of shape (observations T, variables N) and + optionally a mask of the same shape and a missing values flag. + model_params : dictionary, optional (default: None) + Optional parameters passed on to sklearn model + data_transform : sklearn preprocessing object, optional (default: None) + Used to transform data prior to fitting. For example, + sklearn.preprocessing.StandardScaler for simple standardization. The + fitted parameters are stored. + mask_type : {None, 'y','x','z','xy','xz','yz','xyz'} + Masking mode: Indicators for which variables in the dependence + measure I(X; Y | Z) the samples should be masked. If None, the mask + is not used. Explained in tutorial on masking and missing values. + verbosity : int, optional (default: 0) + Level of verbosity. + """ + + def __init__(self, + dataframe, + model_params=None, + data_transform=sklearn.preprocessing.StandardScaler(), + mask_type=None, + verbosity=0): + # Initialize the member variables to None + self.phi = None + self.psi = None + self.all_psi_k = None + + # Build the model using the parameters + if model_params is None: + model_params = {} + this_model = sklearn.linear_model.LinearRegression(**model_params) + Models.__init__(self, + dataframe=dataframe, + model=this_model, + data_transform=data_transform, + mask_type=mask_type, + verbosity=verbosity) + +
[docs] def fit_model(self, all_parents, tau_max=None): + """Fit linear time series model. + + Fits a sklearn.linear_model.LinearRegression model to the parents of + each variable and computes the coefficient matrices :math:`\Phi` and + :math:`\Psi` as described in [4]_. Does not accepted + contemporaneous links. + + Parameters + ---------- + all_parents : dictionary + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing + the parents estimated with PCMCI. + tau_max : int, optional (default: None) + Maximum time lag. If None, the maximum lag in all_parents is used. + """ + for j in all_parents.keys(): + for parent in all_parents[j]: + var, lag = parent + if lag == 0: + raise ValueError("all_parents cannot contain " + "contemporaneous links for the LinearMediation" + " class. Use the optimal causal effects " + "class.") + + # Fit the model using the base class + self.fit_results = self.get_fit(all_parents=all_parents, + selected_variables=None, + tau_max=tau_max) + # Cache the results in the member variables + coeffs = self.get_coefs() + self.phi = self._get_phi(coeffs) + self.psi = self._get_psi(self.phi) + self.all_psi_k = self._get_all_psi_k(self.phi)
+ + def _check_sanity(self, X, Y, k=None): + """Checks validity of some parameters.""" + + if len(X) != 1 or len(Y) != 1: + raise ValueError("X must be of form [(i, -tau)] and Y = [(j, 0)], " + "but are X = %s, Y=%s" % (X, Y)) + + i, tau = X[0] + + if abs(tau) > self.tau_max: + raise ValueError("X must be of form [(i, -tau)] with" + " tau <= tau_max") + + if k is not None and (k < 0 or k >= self.N): + raise ValueError("k must be in [0, N)") + + def _get_phi(self, coeffs): + """Returns the linear coefficient matrices for different lags. + + Parameters + ---------- + coeffs : dictionary + Dictionary of coefficients for each parent. + + Returns + ------- + phi : array-like, shape (tau_max + 1, N, N) + Matrices of coefficients for each time lag. + """ + + phi = np.zeros((self.tau_max + 1, self.N, self.N)) + phi[0] = np.identity(self.N) + + for j in list(coeffs): + for par in list(coeffs[j]): + i, tau = par + phi[abs(tau), j, i] = coeffs[j][par] + + return phi + + def _get_psi(self, phi): + """Returns the linear causal effect matrices for different lags. + + Parameters + ---------- + phi : array-like + Coefficient matrices at different lags. + + Returns + ------- + psi : array-like, shape (tau_max + 1, N, N) + Matrices of causal effects for each time lag. + """ + + psi = np.zeros((self.tau_max + 1, self.N, self.N)) + + psi[0] = np.identity(self.N) + for n in range(1, self.tau_max + 1): + psi[n] = np.zeros((self.N, self.N)) + for s in range(1, n + 1): + psi[n] += np.dot(phi[s], psi[n - s]) + + return psi + + def _get_psi_k(self, phi, k): + """Returns the linear causal effect matrices excluding variable k. + + Parameters + ---------- + phi : array-like + Coefficient matrices at different lags. + k : int + Variable index to exclude causal effects through. + + Returns + ------- + psi_k : array-like, shape (tau_max + 1, N, N) + Matrices of causal effects excluding k. + """ + + psi_k = np.zeros((self.tau_max + 1, self.N, self.N)) + + psi_k[0] = np.identity(self.N) + phi_k = np.copy(phi) + phi_k[1:, k, :] = 0. + for n in range(1, self.tau_max + 1): + psi_k[n] = np.zeros((self.N, self.N)) + for s in range(1, n + 1): + psi_k[n] += np.dot(phi_k[s], psi_k[n - s]) + + return psi_k + + def _get_all_psi_k(self, phi): + """Returns the linear causal effect matrices excluding variables. + + Parameters + ---------- + phi : array-like + Coefficient matrices at different lags. + + Returns + ------- + all_psi_k : array-like, shape (N, tau_max + 1, N, N) + Matrices of causal effects where for each row another variable is + excluded. + """ + + all_psi_k = np.zeros((self.N, self.tau_max + 1, self.N, self.N)) + + for k in range(self.N): + all_psi_k[k] = self._get_psi_k(phi, k) + + return all_psi_k + +
[docs] def get_val_matrix(self, ): + """Returns the matrix of linear coefficients. + + Requires fit_model() before. An entry val_matrix[i,j,tau] gives the + coefficient of the link from i to j at lag tau. Lag=0 is always set + to zero for LinearMediation, use Models class for contemporaneous + models. + + Returns + ------- + val_matrix : array + Matrix of linear coefficients, shape (N, N, tau_max + 1). + """ + return self.phi.transpose()
+ +
[docs] def net_to_tsg(self, row, lag, max_lag): + """Helper function to translate from network to time series graph.""" + return row * max_lag + lag
+ +
[docs] def tsg_to_net(self, node, max_lag): + """Helper function to translate from time series graph to network.""" + row = node // max_lag + lag = node % max_lag + return (row, -lag)
+ +
[docs] def get_tsg(self, link_matrix, val_matrix=None, include_neighbors=False): + """Returns time series graph matrix. + + Constructs a matrix of shape (N*tau_max, N*tau_max) from link_matrix. + This matrix can be used for plotting the time series graph and analyzing + causal pathways. + + Parameters + ---------- + link_matrix : bool array-like, optional (default: None) + Matrix of significant links. Must be of same shape as val_matrix. + Either sig_thres or link_matrix has to be provided. + val_matrix : array_like + Matrix of shape (N, N, tau_max+1) containing test statistic values. + include_neighbors : bool, optional (default: False) + Whether to include causal paths emanating from neighbors of i + + Returns + ------- + tsg : array of shape (N*tau_max, N*tau_max) + Time series graph matrix. + """ + + N = len(link_matrix) + max_lag = link_matrix.shape[2] + 1 + + # Create TSG + tsg = np.zeros((N * max_lag, N * max_lag)) + for i, j, tau in np.column_stack(np.where(link_matrix)): + if tau > 0 or include_neighbors: + for t in range(max_lag): + link_start = self.net_to_tsg(i, t - tau, max_lag) + link_end = self.net_to_tsg(j, t, max_lag) + if (0 <= link_start and + (link_start % max_lag) <= (link_end % max_lag)): + if val_matrix is not None: + tsg[link_start, link_end] = val_matrix[i, j, tau] + else: + tsg[link_start, link_end] = 1 + return tsg
+ +
[docs] def get_mediation_graph_data(self, i, tau, j, include_neighbors=False): + r"""Returns link and node weights for mediation analysis. + + Returns array with non-zero entries for links that are on causal + paths between :math:`i` and :math:`j` at lag :math:`\tau`. + ``path_val_matrix`` contains the corresponding path coefficients and + ``path_node_array`` the MCE values. ``tsg_path_val_matrix`` contains the + corresponding values in the time series graph format. + + Parameters + ---------- + i : int + Index of cause variable. + tau : int + Lag of cause variable. + j : int + Index of effect variable. + include_neighbors : bool, optional (default: False) + Whether to include causal paths emanating from neighbors of i + + Returns + ------- + graph_data : dictionary + Dictionary of matrices for coloring mediation graph plots. + """ + + path_link_matrix = np.zeros((self.N, self.N, self.tau_max + 1)) + path_val_matrix = np.zeros((self.N, self.N, self.tau_max + 1)) + + # Get mediation of path variables + path_node_array = (self.psi.reshape(1, self.tau_max + 1, self.N, self.N) + - self.all_psi_k)[:, abs(tau), j, i] + + # Get involved links + val_matrix = self.phi.transpose() + link_matrix = val_matrix != 0. + + max_lag = link_matrix.shape[2] + 1 + + # include_neighbors = False because True would allow + # --> o -- motifs in networkx.all_simple_paths as paths, but + # these are blocked... + tsg = self.get_tsg(link_matrix, val_matrix=val_matrix, + include_neighbors=False) + + if include_neighbors: + # Add contemporaneous links only at source node + for m, n in zip(*np.where(link_matrix[:, :, 0])): + # print m,n + if m != n: + tsg[self.net_to_tsg(m, max_lag - tau - 1, max_lag), + self.net_to_tsg(n, max_lag - tau - 1, max_lag) + ] = val_matrix[m, n, 0] + + tsg_path_val_matrix = np.zeros(tsg.shape) + + graph = networkx.DiGraph(tsg) + pathways = [] + + for path in networkx.all_simple_paths(graph, + source=self.net_to_tsg(i, + max_lag - tau - 1, + max_lag), + target=self.net_to_tsg(j, + max_lag - 0 - 1, + max_lag)): + pathways.append([self.tsg_to_net(p, max_lag) for p in path]) + for ip, p in enumerate(path[1:]): + tsg_path_val_matrix[path[ip], p] = tsg[path[ip], p] + + k, tau_k = self.tsg_to_net(p, max_lag) + link_start = self.tsg_to_net(path[ip], max_lag) + link_end = self.tsg_to_net(p, max_lag) + delta_tau = abs(link_end[1] - link_start[1]) + path_val_matrix[link_start[0], + link_end[0], + delta_tau] = val_matrix[link_start[0], + link_end[0], + delta_tau] + + graph_data = {'path_node_array': path_node_array, + 'path_val_matrix': path_val_matrix, + 'tsg_path_val_matrix': tsg_path_val_matrix} + + return graph_data
+ +
[docs] def get_coeff(self, i, tau, j): + """Returns link coefficient. + + This is the direct causal effect for a particular link (i, tau) --> j. + + Parameters + ---------- + i : int + Index of cause variable. + tau : int + Lag of cause variable. + j : int + Index of effect variable. + + Returns + ------- + coeff : float + """ + return self.phi[abs(tau), j, i]
+ +
[docs] def get_ce(self, i, tau, j): + """Returns the causal effect. + + This is the causal effect for (i, tau) -- --> j. + + Parameters + ---------- + i : int + Index of cause variable. + tau : int + Lag of cause variable. + j : int + Index of effect variable. + + Returns + ------- + ce : float + """ + return self.psi[abs(tau), j, i]
+ +
[docs] def get_ce_max(self, i, j): + """Returns the causal effect. + + This is the maximum absolute causal effect for i --> j across all lags. + + Parameters + ---------- + i : int + Index of cause variable. + j : int + Index of effect variable. + + Returns + ------- + ce : float + """ + argmax = np.abs(self.psi[1:, j, i]).argmax() + return self.psi[1:, j, i][argmax]
+ +
[docs] def get_mce(self, i, tau, j, k): + """Returns the mediated causal effect. + + This is the causal effect for i --> j minus the causal effect not going + through k. + + Parameters + ---------- + i : int + Index of cause variable. + tau : int + Lag of cause variable. + j : int + Index of effect variable. + k : int + Index of mediator variable. + + Returns + ------- + mce : float + """ + mce = self.psi[abs(tau), j, i] - self.all_psi_k[k, abs(tau), j, i] + return mce
+ +
[docs] def get_ace(self, i, lag_mode='absmax', exclude_i=True): + """Returns the average causal effect. + + This is the average causal effect (ACE) emanating from variable i to any + other variable. With lag_mode='absmax' this is based on the lag of + maximum CE for each pair. + + Parameters + ---------- + i : int + Index of cause variable. + lag_mode : {'absmax', 'all_lags'} + Lag mode. Either average across all lags between each pair or only + at the lag of maximum absolute causal effect. + exclude_i : bool, optional (default: True) + Whether to exclude causal effects on the variable itself at later + lags. + + Returns + ------- + ace :float + Average Causal Effect. + """ + + all_but_i = np.ones(self.N, dtype='bool') + if exclude_i: + all_but_i[i] = False + + if lag_mode == 'absmax': + return np.abs(self.psi[1:, all_but_i, i]).max(axis=0).mean() + elif lag_mode == 'all_lags': + return np.abs(self.psi[1:, all_but_i, i]).mean() + else: + raise ValueError("lag_mode = %s not implemented" % lag_mode)
+ +
[docs] def get_all_ace(self, lag_mode='absmax', exclude_i=True): + """Returns the average causal effect for all variables. + + This is the average causal effect (ACE) emanating from variable i to any + other variable. With lag_mode='absmax' this is based on the lag of + maximum CE for each pair. + + Parameters + ---------- + lag_mode : {'absmax', 'all_lags'} + Lag mode. Either average across all lags between each pair or only + at the lag of maximum absolute causal effect. + exclude_i : bool, optional (default: True) + Whether to exclude causal effects on the variable itself at later + lags. + + Returns + ------- + ace : array of shape (N,) + Average Causal Effect for each variable. + """ + + ace = np.zeros(self.N) + for i in range(self.N): + ace[i] = self.get_ace(i, lag_mode=lag_mode, exclude_i=exclude_i) + + return ace
+ +
[docs] def get_acs(self, j, lag_mode='absmax', exclude_j=True): + """Returns the average causal susceptibility. + + This is the Average Causal Susceptibility (ACS) affecting a variable j + from any other variable. With lag_mode='absmax' this is based on the lag + of maximum CE for each pair. + + Parameters + ---------- + j : int + Index of variable. + lag_mode : {'absmax', 'all_lags'} + Lag mode. Either average across all lags between each pair or only + at the lag of maximum absolute causal effect. + exclude_j : bool, optional (default: True) + Whether to exclude causal effects on the variable itself at previous + lags. + + Returns + ------- + acs : float + Average Causal Susceptibility. + """ + + all_but_j = np.ones(self.N, dtype='bool') + if exclude_j: + all_but_j[j] = False + + if lag_mode == 'absmax': + return np.abs(self.psi[1:, j, all_but_j]).max(axis=0).mean() + elif lag_mode == 'all_lags': + return np.abs(self.psi[1:, j, all_but_j]).mean() + else: + raise ValueError("lag_mode = %s not implemented" % lag_mode)
+ +
[docs] def get_all_acs(self, lag_mode='absmax', exclude_j=True): + """Returns the average causal susceptibility. + + This is the Average Causal Susceptibility (ACS) for each variable from + any other variable. With lag_mode='absmax' this is based on the lag of + maximum CE for each pair. + + Parameters + ---------- + lag_mode : {'absmax', 'all_lags'} + Lag mode. Either average across all lags between each pair or only + at the lag of maximum absolute causal effect. + exclude_j : bool, optional (default: True) + Whether to exclude causal effects on the variable itself at previous + lags. + + Returns + ------- + acs : array of shape (N,) + Average Causal Susceptibility. + """ + + acs = np.zeros(self.N) + for j in range(self.N): + acs[j] = self.get_acs(j, lag_mode=lag_mode, exclude_j=exclude_j) + + return acs
+ +
[docs] def get_amce(self, k, lag_mode='absmax', + exclude_k=True, exclude_self_effects=True): + """Returns the average mediated causal effect. + + This is the Average Mediated Causal Effect (AMCE) through a variable k + With lag_mode='absmax' this is based on the lag of maximum CE for each + pair. + + Parameters + ---------- + k : int + Index of variable. + lag_mode : {'absmax', 'all_lags'} + Lag mode. Either average across all lags between each pair or only + at the lag of maximum absolute causal effect. + exclude_k : bool, optional (default: True) + Whether to exclude causal effects through the variable itself at + previous lags. + exclude_self_effects : bool, optional (default: True) + Whether to exclude causal self effects of variables on themselves. + + Returns + ------- + amce : float + Average Mediated Causal Effect. + """ + + all_but_k = np.ones(self.N, dtype='bool') + if exclude_k: + all_but_k[k] = False + N_new = self.N - 1 + else: + N_new = self.N + + if exclude_self_effects: + weights = np.identity(N_new) == False + else: + weights = np.ones((N_new, N_new), dtype='bool') + + if self.tau_max < 2: + raise ValueError("Mediation only nonzero for tau_max >= 2") + + all_mce = self.psi[2:, :, :] - self.all_psi_k[k, 2:, :, :] + # all_mce[:, range(self.N), range(self.N)] = 0. + + if lag_mode == 'absmax': + return np.average(np.abs(all_mce[:, all_but_k, :] + [:, :, all_but_k] + ).max(axis=0), weights=weights) + elif lag_mode == 'all_lags': + return np.abs(all_mce[:, all_but_k, :][:, :, all_but_k]).mean() + else: + raise ValueError("lag_mode = %s not implemented" % lag_mode)
+ +
[docs] def get_all_amce(self, lag_mode='absmax', + exclude_k=True, exclude_self_effects=True): + """Returns the average mediated causal effect. + + This is the Average Mediated Causal Effect (AMCE) through all variables + With lag_mode='absmax' this is based on the lag of maximum CE for each + pair. + + Parameters + ---------- + lag_mode : {'absmax', 'all_lags'} + Lag mode. Either average across all lags between each pair or only + at the lag of maximum absolute causal effect. + exclude_k : bool, optional (default: True) + Whether to exclude causal effects through the variable itself at + previous lags. + exclude_self_effects : bool, optional (default: True) + Whether to exclude causal self effects of variables on themselves. + + Returns + ------- + amce : array of shape (N,) + Average Mediated Causal Effect. + """ + amce = np.zeros(self.N) + for k in range(self.N): + amce[k] = self.get_amce(k, + lag_mode=lag_mode, + exclude_k=exclude_k, + exclude_self_effects=exclude_self_effects) + + return amce
+ + +
[docs]class Prediction(Models, PCMCI): + r"""Prediction class for time series models. + + Allows to fit and predict from any sklearn model. The optimal predictors can + be estimated using PCMCI. Also takes care of missing values, masking and + preprocessing. + + Parameters + ---------- + dataframe : data object + Tigramite dataframe object. It must have the attributes dataframe.values + yielding a numpy array of shape (observations T, variables N) and + optionally a mask of the same shape and a missing values flag. + train_indices : array-like + Either boolean array or time indices marking the training data. + test_indices : array-like + Either boolean array or time indices marking the test data. + prediction_model : sklearn model object + For example, sklearn.linear_model.LinearRegression() for a linear + regression model. + cond_ind_test : Conditional independence test object, optional + Only needed if predictors are estimated with causal algorithm. + The class will be initialized with masking set to the training data. + data_transform : sklearn preprocessing object, optional (default: None) + Used to transform data prior to fitting. For example, + sklearn.preprocessing.StandardScaler for simple standardization. The + fitted parameters are stored. + verbosity : int, optional (default: 0) + Level of verbosity. + """ + + def __init__(self, + dataframe, + train_indices, + test_indices, + prediction_model, + cond_ind_test=None, + data_transform=None, + verbosity=0): + + # Default value for the mask + mask = dataframe.mask + if mask is None: + mask = np.zeros(dataframe.values.shape, dtype='bool') + # Get the dataframe shape + T = len(dataframe.values) + # Have the default dataframe be the training data frame + train_mask = np.copy(mask) + train_mask[[t for t in range(T) if t not in train_indices]] = True + self.dataframe = DataFrame(dataframe.values, + mask=train_mask, + missing_flag=dataframe.missing_flag) + # Initialize the models baseclass with the training dataframe + Models.__init__(self, + dataframe=self.dataframe, + model=prediction_model, + data_transform=data_transform, + mask_type='y', + verbosity=verbosity) + + # Build the testing dataframe as well + self.test_mask = np.copy(mask) + self.test_mask[[t for t in range(T) if t not in test_indices]] = True + + # Setup the PCMCI instance + if cond_ind_test is not None: + # Force the masking + cond_ind_test.set_mask_type('y') + cond_ind_test.verbosity = verbosity + PCMCI.__init__(self, + dataframe=self.dataframe, + cond_ind_test=cond_ind_test, + selected_variables=None, + verbosity=verbosity) + + # Set the member variables + self.cond_ind_test = cond_ind_test + # Initialize member varialbes that are set outside + self.target_predictors = None + self.selected_targets = None + self.fitted_model = None + self.test_array = None + +
[docs] def get_predictors(self, + selected_targets=None, + selected_links=None, + steps_ahead=1, + tau_max=1, + pc_alpha=0.2, + max_conds_dim=None, + max_combinations=1): + """Estimate predictors using PC1 algorithm. + + Wrapper around PCMCI.run_pc_stable that estimates causal predictors. + The lead time can be specified by ``steps_ahead``. + + Parameters + ---------- + selected_targets : list of ints, optional (default: None) + List of variables to estimate predictors of. If None, predictors of + all variables are estimated. + selected_links : dict or None + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested + steps_ahead : int, default: 1 + Minimum time lag to test. Useful for multi-step ahead predictions. + tau_max : int, default: 1 + Maximum time lag. Must be larger or equal to tau_min. + pc_alpha : float or list of floats, default: 0.2 + Significance level in algorithm. If a list or None is passed, the + pc_alpha level is optimized for every variable across the given + pc_alpha values using the score computed in + cond_ind_test.get_model_selection_criterion() + max_conds_dim : int or None + Maximum number of conditions to test. If None is passed, this number + is unrestricted. + max_combinations : int, default: 1 + Maximum number of combinations of conditions of current cardinality + to test. Defaults to 1 for PC_1 algorithm. For original PC algorithm + a larger number, such as 10, can be used. + + Returns + ------- + predictors : dict + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + containing estimated predictors. + """ + # Ensure an independence model is given + if self.cond_ind_test is None: + raise ValueError("No cond_ind_test given!") + # Set the selected variables + self.selected_variables = range(self.N) + if selected_targets is not None: + self.selected_variables = selected_targets + predictors = self.run_pc_stable(selected_links=selected_links, + tau_min=steps_ahead, + tau_max=tau_max, + save_iterations=False, + pc_alpha=pc_alpha, + max_conds_dim=max_conds_dim, + max_combinations=max_combinations) + return predictors
+ +
[docs] def fit(self, target_predictors, + selected_targets=None, tau_max=None, return_data=False): + r"""Fit time series model. + + Wrapper around ``Models.get_fit()``. To each variable in + ``selected_targets``, the sklearn model is fitted with :math:`y` given + by the target variable, and :math:`X` given by its predictors. The + fitted model class is returned for later use. + + Parameters + ---------- + target_predictors : dictionary + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing + the predictors estimated with PCMCI. + selected_targets : list of integers, optional (default: range(N)) + Specify to fit model only for selected targets. If None is + passed, models are estimated for all variables. + tau_max : int, optional (default: None) + Maximum time lag. If None, the maximum lag in target_predictors is + used. + return_data : bool, optional (default: False) + Whether to save the data array. + + Returns + ------- + self : instance of self + """ + + self.target_predictors = target_predictors + + if selected_targets is None: + self.selected_targets = range(self.N) + else: + self.selected_targets = selected_targets + + for target in self.selected_targets: + if target not in list(self.target_predictors): + raise ValueError("No predictors given for target %s" % target) + + self.fitted_model = \ + self.get_fit(all_parents=self.target_predictors, + selected_variables=self.selected_targets, + tau_max=tau_max, + return_data=return_data) + return self
+ +
[docs] def predict(self, target, + new_data=None, + pred_params=None, + cut_off='max_lag_or_tau_max'): + r"""Predict target variable with fitted model. + + Uses the model.predict() function of the sklearn model. + + If target is an int, the predicted time series is returned. If target + is a list of integers, then a list of predicted time series is returned. + If the list of integers equals range(N), then an array of shape (T, N) + of the predicted series is returned. + + Parameters + ---------- + target : int or list of integers + Index or indices of target variable(s). + new_data : data object, optional + New Tigramite dataframe object with optional new mask. + pred_params : dict, optional + Optional parameters passed on to sklearn prediction function. + cut_off : {'2xtau_max', 'max_lag', 'max_lag_or_tau_max'} + How many samples to cutoff at the beginning. The default is + '2xtau_max', which guarantees that MCI tests are all conducted on + the same samples. For modeling, 'max_lag_or_tau_max' can be used, + which uses the maximum of tau_max and the conditions, which is + useful to compare multiple models on the same sample. Last, + 'max_lag' uses as much samples as possible. + + Returns + ------- + Results from prediction. + """ + + if isinstance(target, int): + target_list = [target] + elif isinstance(target, list): + target_list = target + else: + raise ValueError("target must be either int or list of integers " + "indicating the index of the variables to " + "predict.") + + if target_list == range(self.N): + return_type = 'array' + elif len(target_list) == 1: + return_type = 'series' + else: + return_type = 'list' + + pred_list = [] + for target in target_list: + # Print message + if self.verbosity > 0: + print("\n##\n## Predicting target %s\n##" % target) + if pred_params is not None: + for key in list(pred_params): + print("%s = %s" % (key, pred_params[key])) + # Default value for pred_params + if pred_params is None: + pred_params = {} + # Check this is a valid target + if target not in self.selected_targets: + raise ValueError("Target %s not yet fitted" % target) + # Construct the array form of the data + Y = [(target, 0)] + X = [(target, 0)] # dummy + Z = self.target_predictors[target] + # Check if we've passed a new dataframe object + test_array = None + if new_data is not None: + # if new_data.mask is None: + # # if no mask is supplied, use the same mask as for the fitted array + # new_data_mask = self.test_mask + # else: + new_data_mask = new_data.mask + test_array, _ = new_data.construct_array(X, Y, Z, + tau_max=self.tau_max, + mask=new_data_mask, + mask_type=self.mask_type, + cut_off=cut_off, + verbosity=self.verbosity) + # Otherwise use the default values + else: + test_array, _ = \ + self.dataframe.construct_array(X, Y, Z, + tau_max=self.tau_max, + mask=self.test_mask, + mask_type=self.mask_type, + cut_off=cut_off, + verbosity=self.verbosity) + # Transform the data if needed + a_transform = self.fitted_model[target]['data_transform'] + if a_transform is not None: + test_array = a_transform.transform(X=test_array.T).T + # Cache the test array + self.test_array = test_array + # Run the predictor + pred_list.append(self.fitted_model[target]['model'].predict( + X=test_array[2:].T, **pred_params)) + + if return_type == 'series': + return pred_list[0] + elif return_type == 'list': + return pred_list + elif return_type == 'array': + return np.array(pred_list).transpose()
+ +
[docs] def get_train_array(self, j): + """Returns training array.""" + return self.fitted_model[j]['data']
+ +
[docs] def get_test_array(self): + """Returns test array.""" + return self.test_array
+ +if __name__ == '__main__': + + import tigramite + import tigramite.data_processing as pp + from tigramite.toymodels import structural_causal_processes as toys + from tigramite.independence_tests import ParCorr + + np.random.seed(6) + + def lin_f(x): return x + + T = 10000 + links = {0: [((0, -1), 0.8, lin_f)], + 1: [((1, -1), 0.8, lin_f), ((0, -1), 0.5, lin_f)], + 2: [((2, -1), 0.8, lin_f), ((1, 0), -0.6, lin_f)]} + # noises = [np.random.randn for j in links.keys()] + data, nonstat = toys.structural_causal_process(links, T=T) + true_parents = toys._get_true_parent_neighbor_dict(links) + dataframe = pp.DataFrame(data) + + # med = Models(dataframe=dataframe, model=sklearn.linear_model.LinearRegression(), data_transform=None) + # # Fit the model + # med.get_fit(all_parents=true_parents, tau_max=3) + + # print(med.get_val_matrix()) + + # for j, i, tau, coeff in toys._iter_coeffs(links): + # print(i, j, tau, coeff, med.get_coeff(i=i, tau=tau, j=j)) + + # for causal_coeff in [med.get_ce(i=0, tau=-2, j=2), + # med.get_mce(i=0, tau=-2, j=2, k=1)]: + # print(causal_coeff) + + + pred = Prediction(dataframe=dataframe, + cond_ind_test=ParCorr(), #CMIknn ParCorr + prediction_model = sklearn.linear_model.LinearRegression(), + # prediction_model = sklearn.gaussian_process.GaussianProcessRegressor(), + # prediction_model = sklearn.neighbors.KNeighborsRegressor(), + data_transform=sklearn.preprocessing.StandardScaler(), + train_indices= range(int(0.8*T)), + test_indices= range(int(0.8*T), T), + verbosity=1 + ) + +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/pcmci.html b/docs/_build/html/_modules/tigramite/pcmci.html new file mode 100644 index 00000000..66dd3f68 --- /dev/null +++ b/docs/_build/html/_modules/tigramite/pcmci.html @@ -0,0 +1,4200 @@ + + + + + + + tigramite.pcmci — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.pcmci

+"""Tigramite causal discovery for time series."""
+
+# Author: Jakob Runge <jakob@jakob-runge.com>
+#
+# License: GNU General Public License v3.0
+
+from __future__ import print_function
+import warnings
+import itertools
+from collections import defaultdict
+from copy import deepcopy
+import numpy as np
+import scipy.stats
+
+
+def _create_nested_dictionary(depth=0, lowest_type=dict):
+    """Create a series of nested dictionaries to a maximum depth.  The first
+    depth - 1 nested dictionaries are defaultdicts, the last is a normal
+    dictionary.
+
+    Parameters
+    ----------
+    depth : int
+        Maximum depth argument.
+    lowest_type: callable (optional)
+        Type contained in leaves of tree.  Ex: list, dict, tuple, int, float ...
+    """
+    new_depth = depth - 1
+    if new_depth <= 0:
+        return defaultdict(lowest_type)
+    return defaultdict(lambda: _create_nested_dictionary(new_depth))
+
+
+def _nested_to_normal(nested_dict):
+    """Transforms the nested default dictionary into a standard dictionaries
+
+    Parameters
+    ----------
+    nested_dict : default dictionary of default dictionaries of ... etc.
+    """
+    if isinstance(nested_dict, defaultdict):
+        nested_dict = {k: _nested_to_normal(v) for k, v in nested_dict.items()}
+    return nested_dict
+
+
+
[docs]class PCMCI(): + r"""PCMCI causal discovery for time series datasets. + + PCMCI is a causal discovery framework for large-scale time series + datasets. This class contains several methods. The standard PCMCI method + addresses time-lagged causal discovery and is described in [1]_ where + also further sub-variants are discussed. Lagged as well as contemporaneous + causal discovery is addressed with PCMCIplus and described in [5]_. See the + tutorials for guidance in applying these methods. + + PCMCI has: + + * different conditional independence tests adapted to linear or + nonlinear dependencies, and continuously-valued or discrete data ( + implemented in ``tigramite.independence_tests``) + * (mostly) hyperparameter optimization + * easy parallelization (separate script) + * handling of masked time series data + * false discovery control and confidence interval estimation + + + Notes + ----- + + .. image:: mci_schematic.* + :width: 200pt + + In the PCMCI framework, the dependency structure of a set of time series + variables is represented in a *time series graph* as shown in the Figure. + The nodes of a time series graph are defined as the variables at + different times and a link indicates a conditional dependency that can be + interpreted as a causal dependency under certain assumptions (see paper). + Assuming stationarity, the links are repeated in time. The parents + :math:`\mathcal{P}` of a variable are defined as the set of all nodes + with a link towards it (blue and red boxes in Figure). + + The different PCMCI methods estimate causal links by iterative + conditional independence testing. PCMCI can be flexibly combined with + any kind of conditional independence test statistic adapted to the kind + of data (continuous or discrete) and its assumed dependency types. + These are available in ``tigramite.independence_tests``. + + NOTE: MCI test statistic values define a particular measure of causal + strength depending on the test statistic used. For example, ParCorr() + results in normalized values between -1 and 1. However, if you are + interested in quantifying causal effects, i.e., the effect of + hypothetical interventions, you may better look at the causal effect + estimation functionality of Tigramite. + + References + ---------- + + .. [1] J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic, + Detecting and quantifying causal associations in large nonlinear time + series datasets. Sci. Adv. 5, eaau4996 (2019) + https://advances.sciencemag.org/content/5/11/eaau4996 + + .. [5] J. Runge, + Discovering contemporaneous and lagged causal relations in + autocorrelated nonlinear time series datasets + http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf + + Parameters + ---------- + dataframe : data object + This is the Tigramite dataframe object. Among others, it has the + attributes dataframe.values yielding a numpy array of shape ( + observations T, variables N) and optionally a mask of the same shape. + cond_ind_test : conditional independence test object + This can be ParCorr or other classes from + ``tigramite.independence_tests`` or an external test passed as a + callable. This test can be based on the class + tigramite.independence_tests.CondIndTest. + selected_variables : list + Deprecated, just here to raise Error if not None. + verbosity : int, optional (default: 0) + Verbose levels 0, 1, ... + + Attributes + ---------- + all_parents : dictionary + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing + the conditioning-parents estimated with PC algorithm. + val_min : dictionary + Dictionary of form val_min[j][(i, -tau)] = float + containing the minimum test statistic value for each link estimated in + the PC algorithm. + pval_max : dictionary + Dictionary of form pval_max[j][(i, -tau)] = float containing the maximum + p-value for each link estimated in the PC algorithm. + iterations : dictionary + Dictionary containing further information on algorithm steps. + N : int + Number of variables. + T : int + Time series sample length. + """ + + def __init__(self, dataframe, + cond_ind_test, + selected_variables=None, + verbosity=0): + # Set the data for this iteration of the algorithm + self.dataframe = dataframe + # Set the conditional independence test to be used + self.cond_ind_test = cond_ind_test + if isinstance(self.cond_ind_test, type): + raise ValueError("PCMCI requires that cond_ind_test " + "is instantiated, e.g. cond_ind_test = " + "ParCorr().") + self.cond_ind_test.set_dataframe(self.dataframe) + # Set the verbosity for debugging/logging messages + self.verbosity = verbosity + # Set the variable names + self.var_names = self.dataframe.var_names + + # Raise error if selected_variables is set + if selected_variables is not None: + raise ValueError("selected_variables is deprecated, use " + "the selected_links parameter in the respective " + "functions run_pcmci() etc.") + + # Store the shape of the data in the T and N variables + self.T = self.dataframe.T + self.N = self.dataframe.N + + def _set_sel_links(self, selected_links, tau_min, tau_max, + remove_contemp=False): + """Helper function to set and check the selected links argument + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are returned. + tau_mix : int + Minimum time delay to test. + tau_max : int + Maximum time delay to test. + remove_contemp : bool + Whether contemporaneous links (at lag zero) should be removed. + + Returns + ------- + selected_links : dict + Cleaned links. + """ + # Copy and pass into the function + _int_sel_links = deepcopy(selected_links) + # Set the default selected links if none are set + _vars = list(range(self.N)) + _lags = list(range(-(tau_max), -tau_min + 1, 1)) + if _int_sel_links is None: + _int_sel_links = {} + # Set the default as all combinations of the selected variables + for j in _vars: + _int_sel_links[j] = [(var, -lag) for var in _vars + for lag in range(tau_min, tau_max + 1) + if not (var == j and lag == 0)] + else: + if remove_contemp: + for j in _int_sel_links.keys(): + _int_sel_links[j] = [link for link in _int_sel_links[j] + if link[1] != 0] + # Otherwise, check that our selection is sane + # Check that the selected links refer to links that are inside the + # data range + _key_set = set(_int_sel_links.keys()) + valid_entries = _key_set == set(range(self.N)) + + for link in _int_sel_links.values(): + if isinstance(link, list) and len(link) == 0: + continue + for var, lag in link: + if var not in _vars or lag not in _lags: + valid_entries = False + + if not valid_entries: + raise ValueError("selected_links" + " must be dictionary with keys for all [0,...,N-1]" + " variables and contain only links from " + "these variables in range [tau_min, tau_max]") + + # Return the selected links + return _int_sel_links + + def _iter_conditions(self, parent, conds_dim, all_parents): + """Yield next condition. + + Yields next condition from lexicographically ordered conditions. + + Parameters + ---------- + parent : tuple + Tuple of form (i, -tau). + conds_dim : int + Cardinality in current step. + all_parents : list + List of form [(0, -1), (3, -2), ...]. + + Yields + ------- + cond : list + List of form [(0, -1), (3, -2), ...] for the next condition. + """ + all_parents_excl_current = [p for p in all_parents if p != parent] + for cond in itertools.combinations(all_parents_excl_current, conds_dim): + yield list(cond) + + def _sort_parents(self, parents_vals): + """Sort current parents according to test statistic values. + + Sorting is from strongest to weakest absolute values. + + Parameters + --------- + parents_vals : dict + Dictionary of form {(0, -1):float, ...} containing the minimum test + statistic value of a link. + + Returns + ------- + parents : list + List of form [(0, -1), (3, -2), ...] containing sorted parents. + """ + if self.verbosity > 1: + print("\n Sorting parents in decreasing order with " + "\n weight(i-tau->j) = min_{iterations} |val_{ij}(tau)| ") + # Get the absolute value for all the test statistics + abs_values = {k: np.abs(parents_vals[k]) for k in list(parents_vals)} + return sorted(abs_values, key=abs_values.get, reverse=True) + + def _dict_to_matrix(self, val_dict, tau_max, n_vars, default=1): + """Helper function to convert dictionary to matrix format. + + Parameters + --------- + val_dict : dict + Dictionary of form {0:{(0, -1):float, ...}, 1:{...}, ...}. + tau_max : int + Maximum lag. + n_vars : int + Number of variables. + default : int + Default value for entries not part of val_dict. + + Returns + ------- + matrix : array of shape (N, N, tau_max+1) + Matrix format of p-values and test statistic values. + """ + matrix = np.ones((n_vars, n_vars, tau_max + 1)) + matrix *= default + + for j in val_dict.keys(): + for link in val_dict[j].keys(): + k, tau = link + matrix[k, j, abs(tau)] = val_dict[j][link] + return matrix + + def _print_link_info(self, j, index_parent, parent, num_parents, + already_removed=False): + """Print info about the current link being tested. + + Parameters + ---------- + j : int + Index of current node being tested. + index_parent : int + Index of the current parent. + parent : tuple + Standard (i, tau) tuple of parent node id and time delay + num_parents : int + Total number of parents. + already_removed : bool + Whether parent was already removed. + """ + link_marker = {True:"o-o", False:"-->"} + + abstau = abs(parent[1]) + if self.verbosity > 1: + print("\n Link (%s % d) %s %s (%d/%d):" % ( + self.var_names[parent[0]], parent[1], link_marker[abstau==0], + self.var_names[j], + index_parent + 1, num_parents)) + + if already_removed: + print(" Already removed.") + + def _print_cond_info(self, Z, comb_index, pval, val): + """Print info about the condition + + Parameters + ---------- + Z : list + The current condition being tested. + comb_index : int + Index of the combination yielding this condition. + pval : float + p-value from this condition. + val : float + value from this condition. + """ + var_name_z = "" + for i, tau in Z: + var_name_z += "(%s % .2s) " % (self.var_names[i], tau) + if len(Z) == 0: var_name_z = "()" + print(" Subset %d: %s gives pval = %.5f / val = % .3f" % + (comb_index, var_name_z, pval, val)) + + def _print_a_pc_result(self, nonsig, conds_dim, max_combinations): + """Print the results from the current iteration of conditions. + + Parameters + ---------- + nonsig : bool + Indicate non-significance. + conds_dim : int + Cardinality of the current step. + max_combinations : int + Maximum number of combinations of conditions of current cardinality + to test. + """ + # Start with an indent + print_str = " " + # Determine the body of the text + if nonsig: + print_str += "Non-significance detected." + elif conds_dim > max_combinations: + print_str += "Still subsets of dimension" + \ + " %d left," % (conds_dim) + \ + " but q_max = %d reached." % (max_combinations) + else: + print_str += "No conditions of dimension %d left." % (conds_dim) + # Print the message + print(print_str) + + def _print_converged_pc_single(self, converged, j, max_conds_dim): + """ + Print statement about the convergence of the pc_stable_single algorithm. + + Parameters + ---------- + convergence : bool + true if convergence was reached. + j : int + Variable index. + max_conds_dim : int + Maximum number of conditions to test. + """ + if converged: + print("\nAlgorithm converged for variable %s" % + self.var_names[j]) + else: + print( + "\nAlgorithm not yet converged, but max_conds_dim = %d" + " reached." % max_conds_dim) + + def _run_pc_stable_single(self, j, + selected_links=None, + tau_min=1, + tau_max=1, + save_iterations=False, + pc_alpha=0.2, + max_conds_dim=None, + max_combinations=1): + """Lagged PC algorithm for estimating lagged parents of single variable. + + Parameters + ---------- + j : int + Variable index. + selected_links : list, optional (default: None) + List of form [(0, -1), (3, -2), ...] + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, optional (default: 1) + Minimum time lag to test. Useful for variable selection in + multi-step ahead predictions. Must be greater zero. + tau_max : int, optional (default: 1) + Maximum time lag. Must be larger or equal to tau_min. + save_iterations : bool, optional (default: False) + Whether to save iteration step results such as conditions used. + pc_alpha : float or None, optional (default: 0.2) + Significance level in algorithm. If a list is given, pc_alpha is + optimized using model selection criteria provided in the + cond_ind_test class as get_model_selection_criterion(). If None, + a default list of values is used. + max_conds_dim : int, optional (default: None) + Maximum number of conditions to test. If None is passed, this number + is unrestricted. + max_combinations : int, optional (default: 1) + Maximum number of combinations of conditions of current cardinality + to test. Defaults to 1 for PC_1 algorithm. For original PC algorithm + a larger number, such as 10, can be used. + + Returns + ------- + parents : list + List of estimated parents. + val_min : dict + Dictionary of form {(0, -1):float, ...} containing the minimum test + statistic value of a link. + pval_max : dict + Dictionary of form {(0, -1):float, ...} containing the maximum + p-value of a link across different conditions. + iterations : dict + Dictionary containing further information on algorithm steps. + """ + # Initialize the dictionaries for the pval_max, val_min parents_values + # results + pval_max = dict() + val_min = dict() + parents_values = dict() + # Initialize the parents values from the selected links, copying to + # ensure this initial argument is unchanged. + parents = deepcopy(selected_links) + val_min = {(p[0], p[1]): None for p in parents} + pval_max = {(p[0], p[1]): None for p in parents} + + # Define a nested defaultdict of depth 4 to save all information about + # iterations + iterations = _create_nested_dictionary(4) + # Ensure tau_min is at least 1 + tau_min = max(1, tau_min) + + # Loop over all possible condition dimensions + max_conds_dim = self._set_max_condition_dim(max_conds_dim, + tau_min, tau_max) + # Iteration through increasing number of conditions, i.e. from + # [0, max_conds_dim] inclusive + converged = False + for conds_dim in range(max_conds_dim + 1): + # (Re)initialize the list of non-significant links + nonsig_parents = list() + # Check if the algorithm has converged + if len(parents) - 1 < conds_dim: + converged = True + break + # Print information about + if self.verbosity > 1: + print("\nTesting condition sets of dimension %d:" % conds_dim) + + # Iterate through all possible pairs (that have not converged yet) + for index_parent, parent in enumerate(parents): + # Print info about this link + if self.verbosity > 1: + self._print_link_info(j, index_parent, parent, len(parents)) + # Iterate through all possible combinations + nonsig = False + for comb_index, Z in \ + enumerate(self._iter_conditions(parent, conds_dim, + parents)): + # Break if we try too many combinations + if comb_index >= max_combinations: + break + # Perform independence test + val, pval = self.cond_ind_test.run_test(X=[parent], + Y=[(j, 0)], + Z=Z, + tau_max=tau_max, + # verbosity=self.verbosity + ) + # Print some information if needed + if self.verbosity > 1: + self._print_cond_info(Z, comb_index, pval, val) + # Keep track of maximum p-value and minimum estimated value + # for each pair (across any condition) + parents_values[parent] = \ + min(np.abs(val), parents_values.get(parent, + float("inf"))) + + if pval_max[parent] is None or pval > pval_max[parent]: + pval_max[parent] = pval + val_min[parent] = val + + # Save the iteration if we need to + if save_iterations: + a_iter = iterations['iterations'][conds_dim][parent] + a_iter[comb_index]['conds'] = deepcopy(Z) + a_iter[comb_index]['val'] = val + a_iter[comb_index]['pval'] = pval + # Delete link later and break while-loop if non-significant + if pval > pc_alpha: + nonsig_parents.append((j, parent)) + nonsig = True + break + + # Print the results if needed + if self.verbosity > 1: + self._print_a_pc_result(nonsig, + conds_dim, max_combinations) + + # Remove non-significant links + for _, parent in nonsig_parents: + del parents_values[parent] + # Return the parents list sorted by the test metric so that the + # updated parents list is given to the next cond_dim loop + parents = self._sort_parents(parents_values) + # Print information about the change in possible parents + if self.verbosity > 1: + print("\nUpdating parents:") + self._print_parents_single(j, parents, parents_values, pval_max) + + # Print information about if convergence was reached + if self.verbosity > 1: + self._print_converged_pc_single(converged, j, max_conds_dim) + # Return the results + return {'parents': parents, + 'val_min': val_min, + 'pval_max': pval_max, + 'iterations': _nested_to_normal(iterations)} + + def _print_pc_params(self, selected_links, tau_min, tau_max, pc_alpha, + max_conds_dim, max_combinations): + """Print the setup of the current pc_stable run. + + Parameters + ---------- + selected_links : dict or None + Dictionary of form specifying which links should be tested. + tau_min : int, default: 1 + Minimum time lag to test. + tau_max : int, default: 1 + Maximum time lag to test. + pc_alpha : float or list of floats + Significance level in algorithm. + max_conds_dim : int + Maximum number of conditions to test. + max_combinations : int + Maximum number of combinations of conditions to test. + """ + print("\n##\n## Step 1: PC1 algorithm with lagged conditions\n##" + "\n\nParameters:") + if selected_links is not None: + print("selected_links = %s" % selected_links) + print("independence test = %s" % self.cond_ind_test.measure + + "\ntau_min = %d" % tau_min + + "\ntau_max = %d" % tau_max + + "\npc_alpha = %s" % pc_alpha + + "\nmax_conds_dim = %s" % max_conds_dim + + "\nmax_combinations = %d" % max_combinations) + print("\n") + + def _print_pc_sel_results(self, pc_alpha, results, j, score, optimal_alpha): + """Print the results from the pc_alpha selection. + + Parameters + ---------- + pc_alpha : list + Tested significance levels in algorithm. + results : dict + Results from the tested pc_alphas. + score : array of floats + scores from each pc_alpha. + j : int + Index of current variable. + optimal_alpha : float + Optimal value of pc_alpha. + """ + print("\n# Condition selection results:") + for iscore, pc_alpha_here in enumerate(pc_alpha): + names_parents = "[ " + for pari in results[pc_alpha_here]['parents']: + names_parents += "(%s % d) " % ( + self.var_names[pari[0]], pari[1]) + names_parents += "]" + print(" pc_alpha=%s got score %.4f with parents %s" % + (pc_alpha_here, score[iscore], names_parents)) + print("\n--> optimal pc_alpha for variable %s is %s" % + (self.var_names[j], optimal_alpha)) + + def _check_tau_limits(self, tau_min, tau_max): + """Check the tau limits adhere to 0 <= tau_min <= tau_max. + + Parameters + ---------- + tau_min : float + Minimum tau value. + tau_max : float + Maximum tau value. + """ + if not 0 <= tau_min <= tau_max: + raise ValueError("tau_max = %d, " % (tau_max) + \ + "tau_min = %d, " % (tau_min) + \ + "but 0 <= tau_min <= tau_max") + + def _set_max_condition_dim(self, max_conds_dim, tau_min, tau_max): + """ + Set the maximum dimension of the conditions. Defaults to self.N*tau_max. + + Parameters + ---------- + max_conds_dim : int + Input maximum condition dimension. + tau_max : int + Maximum tau. + + Returns + ------- + max_conds_dim : int + Input maximum condition dimension or default. + """ + # Check if an input was given + if max_conds_dim is None: + max_conds_dim = self.N * (tau_max - tau_min + 1) + # Check this is a valid + if max_conds_dim < 0: + raise ValueError("maximum condition dimension must be >= 0") + return max_conds_dim + +
[docs] def run_pc_stable(self, + selected_links=None, + tau_min=1, + tau_max=1, + save_iterations=False, + pc_alpha=0.2, + max_conds_dim=None, + max_combinations=1): + """Lagged PC algorithm for estimating lagged parents of all variables. + + Parents are made available as self.all_parents + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, default: 1 + Minimum time lag to test. Useful for multi-step ahead predictions. + Must be greater zero. + tau_max : int, default: 1 + Maximum time lag. Must be larger or equal to tau_min. + save_iterations : bool, default: False + Whether to save iteration step results such as conditions used. + pc_alpha : float or list of floats, default: [0.05, 0.1, 0.2, ..., 0.5] + Significance level in algorithm. If a list or None is passed, the + pc_alpha level is optimized for every variable across the given + pc_alpha values using the score computed in + cond_ind_test.get_model_selection_criterion(). + max_conds_dim : int or None + Maximum number of conditions to test. If None is passed, this number + is unrestricted. + max_combinations : int, default: 1 + Maximum number of combinations of conditions of current cardinality + to test. Defaults to 1 for PC_1 algorithm. For original PC algorithm + a larger number, such as 10, can be used. + + Returns + ------- + all_parents : dict + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + containing estimated parents. + """ + # Create an internal copy of pc_alpha + _int_pc_alpha = deepcopy(pc_alpha) + # Check if we are selecting an optimal alpha value + select_optimal_alpha = True + # Set the default values for pc_alpha + if _int_pc_alpha is None: + _int_pc_alpha = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5] + elif not isinstance(_int_pc_alpha, (list, tuple, np.ndarray)): + _int_pc_alpha = [_int_pc_alpha] + select_optimal_alpha = False + # Check the limits on tau_min + self._check_tau_limits(tau_min, tau_max) + tau_min = max(1, tau_min) + # Check that the maximum combinations variable is correct + if max_combinations <= 0: + raise ValueError("max_combinations must be > 0") + # Implement defaultdict for all pval_max, val_max, and iterations + pval_max = defaultdict(dict) + val_min = defaultdict(dict) + iterations = defaultdict(dict) + + if self.verbosity > 0: + self._print_pc_params(selected_links, tau_min, tau_max, + _int_pc_alpha, max_conds_dim, + max_combinations) + + # Set the selected links + _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max, + remove_contemp=True) + + # Initialize all parents + all_parents = dict() + # Set the maximum condition dimension + max_conds_dim = self._set_max_condition_dim(max_conds_dim, + tau_min, tau_max) + + # Loop through the selected variables + for j in range(self.N): + # Print the status of this variable + if self.verbosity > 1: + print("\n## Variable %s" % self.var_names[j]) + print("\nIterating through pc_alpha = %s:" % _int_pc_alpha) + # Initialize the scores for selecting the optimal alpha + score = np.zeros_like(_int_pc_alpha) + # Initialize the result + results = {} + for iscore, pc_alpha_here in enumerate(_int_pc_alpha): + # Print statement about the pc_alpha being tested + if self.verbosity > 1: + print("\n# pc_alpha = %s (%d/%d):" % (pc_alpha_here, + iscore + 1, + score.shape[0])) + # Get the results for this alpha value + results[pc_alpha_here] = \ + self._run_pc_stable_single(j, + selected_links=_int_sel_links[j], + tau_min=tau_min, + tau_max=tau_max, + save_iterations=save_iterations, + pc_alpha=pc_alpha_here, + max_conds_dim=max_conds_dim, + max_combinations=max_combinations) + # Figure out the best score if there is more than one pc_alpha + # value + if select_optimal_alpha: + score[iscore] = \ + self.cond_ind_test.get_model_selection_criterion( + j, results[pc_alpha_here]['parents'], tau_max) + # Record the optimal alpha value + optimal_alpha = _int_pc_alpha[score.argmin()] + # Only print the selection results if there is more than one + # pc_alpha + if self.verbosity > 1 and select_optimal_alpha: + self._print_pc_sel_results(_int_pc_alpha, results, j, + score, optimal_alpha) + # Record the results for this variable + all_parents[j] = results[optimal_alpha]['parents'] + val_min[j] = results[optimal_alpha]['val_min'] + pval_max[j] = results[optimal_alpha]['pval_max'] + iterations[j] = results[optimal_alpha]['iterations'] + # Only save the optimal alpha if there is more than one pc_alpha + if select_optimal_alpha: + iterations[j]['optimal_pc_alpha'] = optimal_alpha + # Save the results in the current status of the algorithm + self.all_parents = all_parents + self.val_matrix = self._dict_to_matrix(val_min, tau_max, self.N, + default=0.) + self.p_matrix = self._dict_to_matrix(pval_max, tau_max, self.N, + default=1.) + self.iterations = iterations + self.val_min = val_min + self.pval_max = pval_max + # Print the results + if self.verbosity > 0: + print("\n## Resulting lagged parent (super)sets:") + self._print_parents(all_parents, val_min, pval_max) + # Return the parents + return all_parents
+ + def _print_parents_single(self, j, parents, val_min, pval_max): + """Print current parents for variable j. + + Parameters + ---------- + j : int + Index of current variable. + parents : list + List of form [(0, -1), (3, -2), ...]. + val_min : dict + Dictionary of form {(0, -1):float, ...} containing the minimum test + statistic value of a link. + pval_max : dict + Dictionary of form {(0, -1):float, ...} containing the maximum + p-value of a link across different conditions. + """ + if len(parents) < 20 or hasattr(self, 'iterations'): + print("\n Variable %s has %d link(s):" % ( + self.var_names[j], len(parents))) + if (hasattr(self, 'iterations') + and 'optimal_pc_alpha' in list(self.iterations[j])): + print(" [pc_alpha = %s]" % ( + self.iterations[j]['optimal_pc_alpha'])) + if val_min is None or pval_max is None: + for p in parents: + print(" (%s % .d)" % ( + self.var_names[p[0]], p[1])) + else: + for p in parents: + print(" (%s % .d): max_pval = %.5f, min_val = % .3f" % ( + self.var_names[p[0]], p[1], pval_max[p], + val_min[p])) + else: + print("\n Variable %s has %d link(s):" % ( + self.var_names[j], len(parents))) + + def _print_parents(self, all_parents, val_min, pval_max): + """Print current parents. + + Parameters + ---------- + all_parents : dictionary + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing + the conditioning-parents estimated with PC algorithm. + val_min : dict + Dictionary of form {0:{(0, -1):float, ...}} containing the minimum + test statistic value of a link. + pval_max : dict + Dictionary of form {0:{(0, -1):float, ...}} containing the maximum + p-value of a link across different conditions. + """ + for j in [var for var in list(all_parents)]: + if val_min is None or pval_max is None: + self._print_parents_single(j, all_parents[j], + None, None) + else: + self._print_parents_single(j, all_parents[j], + val_min[j], pval_max[j]) + + def _mci_condition_to_string(self, conds): + """Convert the list of conditions into a string. + + Parameters + ---------- + conds : list + List of conditions. + """ + cond_string = "[ " + for k, tau_k in conds: + cond_string += "(%s % d) " % (self.var_names[k], tau_k) + cond_string += "]" + return cond_string + + def _print_mci_conditions(self, conds_y, conds_x_lagged, + j, i, tau, count, n_parents): + """Print information about the conditions for the MCI algorithm. + + Parameters + ---------- + conds_y : list + Conditions on node. + conds_x_lagged : list + Conditions on parent. + j : int + Current node. + i : int + Parent node. + tau : int + Parent time delay. + count : int + Index of current parent. + n_parents : int + Total number of parents. + """ + # Remove the current parent from the conditions + conds_y_no_i = [node for node in conds_y if node != (i, tau)] + # Get the condition string for parent + condy_str = self._mci_condition_to_string(conds_y_no_i) + # Get the condition string for node + condx_str = self._mci_condition_to_string(conds_x_lagged) + # Formate and print the information + indent = "\n " + print_str = indent + "link (%s % d) " % (self.var_names[i], tau) + print_str += "--> %s (%d/%d):" % ( + self.var_names[j], count + 1, n_parents) + print_str += indent + "with conds_y = %s" % (condy_str) + print_str += indent + "with conds_x = %s" % (condx_str) + print(print_str) + + def _print_pcmciplus_conditions(self, lagged_parents, i, j, abstau, + max_conds_py, max_conds_px, + max_conds_px_lagged): + """Print information about the conditions for PCMCIplus. + + Parameters + ---------- + lagged_parents : dictionary of lists + Dictionary of lagged parents for each node. + j : int + Current node. + i : int + Parent node. + abstau : int + Parent time delay. + max_conds_py : int + Max number of parents for node j. + max_conds_px : int + Max number of parents for lagged node i. + max_conds_px_lagged : int + Maximum number of lagged conditions of X when X is lagged in MCI + tests. If None is passed, this number is equal to max_conds_px. + """ + conds_y = lagged_parents[j][:max_conds_py] + conds_y_no_i = [node for node in conds_y if node != (i, -abstau)] + if abstau == 0: + conds_x = lagged_parents[i][:max_conds_px] + else: + if max_conds_px_lagged is None: + conds_x = lagged_parents[i][:max_conds_px] + else: + conds_x = lagged_parents[i][:max_conds_px_lagged] + + # Shift the conditions for X by tau + conds_x_lagged = [(k, -abstau + k_tau) for k, k_tau in conds_x] + condy_str = self._mci_condition_to_string(conds_y_no_i) + condx_str = self._mci_condition_to_string(conds_x_lagged) + print_str = " with conds_y = %s" % (condy_str) + print_str += "\n with conds_x = %s" % (condx_str) + print(print_str) + + def _get_int_parents(self, parents): + """Get the input parents dictionary. + + Parameters + ---------- + parents : dict or None + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + specifying the conditions for each variable. If None is + passed, no conditions are used. + + Returns + ------- + int_parents : defaultdict of lists + Internal copy of parents, respecting default options + """ + int_parents = deepcopy(parents) + if int_parents is None: + int_parents = defaultdict(list) + else: + int_parents = defaultdict(list, int_parents) + return int_parents + + def _iter_indep_conds(self, + parents, + selected_links, + max_conds_py, + max_conds_px): + """Iterate through the conditions dictated by the arguments, yielding + the needed arguments for conditional independence functions. + + Parameters + ---------- + parents : dict + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + specifying the conditions for each variable. + selected_links : dict + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. + max_conds_py : int + Maximum number of conditions of Y to use. + max_conds_px : int + Maximum number of conditions of Z to use. + + Yields + ------ + i, j, tau, Z : list of tuples + (i, tau) is the parent node, (j, 0) is the current node, and Z is of + the form [(var, tau + tau')] and specifies the condition to test + """ + # Loop over the selected variables + for j in range(self.N): + # Get the conditions for node j + conds_y = parents[j][:max_conds_py] + # Create a parent list from links seperated in time and by node + parent_list = [(i, tau) for i, tau in selected_links[j] + if (i, tau) != (j, 0)] + # Iterate through parents (except those in conditions) + for cnt, (i, tau) in enumerate(parent_list): + # Get the conditions for node i + conds_x = parents[i][:max_conds_px] + # Shift the conditions for X by tau + conds_x_lagged = [(k, tau + k_tau) for k, k_tau in conds_x] + # Print information about the mci conditions if requested + if self.verbosity > 1: + self._print_mci_conditions(conds_y, conds_x_lagged, j, i, + tau, cnt, len(parent_list)) + # Construct lists of tuples for estimating + # I(X_t-tau; Y_t | Z^Y_t, Z^X_t-tau) + # with conditions for X shifted by tau + Z = [node for node in conds_y if node != (i, tau)] + # Remove overlapped nodes between conds_x_lagged and conds_y + Z += [node for node in conds_x_lagged if node not in Z] + # Yield these list + yield j, i, tau, Z + + def _run_mci_or_variants(self, + selected_links=None, + tau_min=0, + tau_max=1, + parents=None, + max_conds_py=None, + max_conds_px=None, + val_only=False, + alpha_level=0.05, + fdr_method='none'): + """Base function for MCI method and variants. + + Returns the matrices of test statistic values, (optionally corrected) + p-values, and (optionally) confidence intervals. Also (new in 4.3) + returns graph based on alpha_level (and optional FDR-correction). + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, default: 0 + Minimum time lag to test. Note that zero-lags are undirected. + tau_max : int, default: 1 + Maximum time lag. Must be larger or equal to tau_min. + parents : dict or None + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + specifying the conditions for each variable. If None is + passed, no conditions are used. + max_conds_py : int or None + Maximum number of conditions of Y to use. If None is passed, this + number is unrestricted. + max_conds_px : int or None + Maximum number of conditions of Z to use. If None is passed, this + number is unrestricted. + val_only : bool, default: False + Option to only compute dependencies and not p-values. + alpha_level : float, optional (default: 0.05) + Significance level at which the p_matrix is thresholded to + get graph. + fdr_method : str, optional (default: 'fdr_bh') + Correction method, currently implemented is Benjamini-Hochberg + False Discovery Rate method. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values, optionally adjusted if fdr_method is + not 'none'. + conf_matrix : array of shape [N, N, tau_max+1,2] + Estimated matrix of confidence intervals of test statistic values. + Only computed if set in cond_ind_test, where also the percentiles + are set. + """ + # Check the limits on tau + self._check_tau_limits(tau_min, tau_max) + # Set the selected links + _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max) + + # Set the maximum condition dimension for Y and X + max_conds_py = self._set_max_condition_dim(max_conds_py, + tau_min, tau_max) + max_conds_px = self._set_max_condition_dim(max_conds_px, + tau_min, tau_max) + # Get the parents that will be checked + _int_parents = self._get_int_parents(parents) + # Initialize the return values + val_matrix = np.zeros((self.N, self.N, tau_max + 1)) + p_matrix = np.ones((self.N, self.N, tau_max + 1)) + # Initialize the optional return of the confidance matrix + conf_matrix = None + if self.cond_ind_test.confidence is not None: + conf_matrix = np.zeros((self.N, self.N, tau_max + 1, 2)) + + # Get the conditions as implied by the input arguments + for j, i, tau, Z in self._iter_indep_conds(_int_parents, + _int_sel_links, + max_conds_py, + max_conds_px): + # Set X and Y (for clarity of code) + X = [(i, tau)] + Y = [(j, 0)] + + if val_only is False: + # Run the independence tests and record the results + val, pval = self.cond_ind_test.run_test(X, Y, Z=Z, + tau_max=tau_max, + # verbosity= + # self.verbosity + ) + val_matrix[i, j, abs(tau)] = val + p_matrix[i, j, abs(tau)] = pval + else: + val = self.cond_ind_test.get_measure(X, Y, Z=Z, tau_max=tau_max) + val_matrix[i, j, abs(tau)] = val + + # Get the confidence value, returns None if cond_ind_test.confidence + # is False + conf = self.cond_ind_test.get_confidence(X, Y, Z=Z, tau_max=tau_max) + # Record the value if the conditional independence requires it + if self.cond_ind_test.confidence: + conf_matrix[i, j, abs(tau)] = conf + + if val_only: + results = {'val_matrix':val_matrix, + 'conf_matrix':conf_matrix} + self.results = results + return results + + # Correct the p_matrix if there is a fdr_method + if fdr_method != 'none': + p_matrix = self.get_corrected_pvalues(p_matrix=p_matrix, tau_min=tau_min, + tau_max=tau_max, + selected_links=_int_sel_links, + fdr_method=fdr_method) + + # Threshold p_matrix to get graph + final_graph = p_matrix <= alpha_level + + # Convert to string graph representation + graph = self.convert_to_string_graph(final_graph) + + # Symmetrize p_matrix and val_matrix + symmetrized_results = self.symmetrize_p_and_val_matrix( + p_matrix=p_matrix, + val_matrix=val_matrix, + selected_links=_int_sel_links, + conf_matrix=conf_matrix) + + if self.verbosity > 0: + self.print_significant_links( + graph = graph, + p_matrix = symmetrized_results['p_matrix'], + val_matrix = symmetrized_results['val_matrix'], + conf_matrix = symmetrized_results['conf_matrix'], + alpha_level = alpha_level) + + # Return the values as a dictionary and store in class + results = { + 'graph': graph, + 'p_matrix': symmetrized_results['p_matrix'], + 'val_matrix': symmetrized_results['val_matrix'], + 'conf_matrix': symmetrized_results['conf_matrix'], + } + self.results = results + return results + +
[docs] def run_mci(self, + selected_links=None, + tau_min=0, + tau_max=1, + parents=None, + max_conds_py=None, + max_conds_px=None, + val_only=False, + alpha_level=0.05, + fdr_method='none'): + """MCI conditional independence tests. + + Implements the MCI test (Algorithm 2 in [1]_). + + Returns the matrices of test statistic values, (optionally corrected) + p-values, and (optionally) confidence intervals. Also (new in 4.3) + returns graph based on alpha_level (and optional FDR-correction). + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, default: 0 + Minimum time lag to test. Note that zero-lags are undirected. + tau_max : int, default: 1 + Maximum time lag. Must be larger or equal to tau_min. + parents : dict or None + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + specifying the conditions for each variable. If None is + passed, no conditions are used. + max_conds_py : int or None + Maximum number of conditions of Y to use. If None is passed, this + number is unrestricted. + max_conds_px : int or None + Maximum number of conditions of Z to use. If None is passed, this + number is unrestricted. + val_only : bool, default: False + Option to only compute dependencies and not p-values. + alpha_level : float, optional (default: 0.05) + Significance level at which the p_matrix is thresholded to + get graph. + fdr_method : str, optional (default: 'fdr_bh') + Correction method, currently implemented is Benjamini-Hochberg + False Discovery Rate method. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values, optionally adjusted if fdr_method is + not 'none'. + conf_matrix : array of shape [N, N, tau_max+1,2] + Estimated matrix of confidence intervals of test statistic values. + Only computed if set in cond_ind_test, where also the percentiles + are set. + """ + + if self.verbosity > 0: + print("\n##\n## Step 2: MCI algorithm\n##" + "\n\nParameters:") + print("\nindependence test = %s" % self.cond_ind_test.measure + + "\ntau_min = %d" % tau_min + + "\ntau_max = %d" % tau_max + + "\nmax_conds_py = %s" % max_conds_py + + "\nmax_conds_px = %s" % max_conds_px) + + return self._run_mci_or_variants( + selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + parents=parents, + max_conds_py=max_conds_py, + max_conds_px=max_conds_px, + val_only=val_only, + alpha_level=alpha_level, + fdr_method=fdr_method)
+ +
[docs] def get_lagged_dependencies(self, + selected_links=None, + tau_min=0, + tau_max=1, + val_only=False, + alpha_level=0.05, + fdr_method='none'): + """Unconditional lagged independence tests. + + Implements the unconditional lagged independence test (see [ 1]_). + + Returns the matrices of test statistic values, (optionally corrected) + p-values, and (optionally) confidence intervals. Also (new in 4.3) + returns graph based on alpha_level (and optional FDR-correction). + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, default: 0 + Minimum time lag to test. Note that zero-lags are undirected. + tau_max : int, default: 1 + Maximum time lag. Must be larger or equal to tau_min. + val_only : bool, default: False + Option to only compute dependencies and not p-values. + alpha_level : float, optional (default: 0.05) + Significance level at which the p_matrix is thresholded to + get graph. + fdr_method : str, optional (default: 'fdr_bh') + Correction method, currently implemented is Benjamini-Hochberg + False Discovery Rate method. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values, optionally adjusted if fdr_method is + not 'none'. + conf_matrix : array of shape [N, N, tau_max+1,2] + Estimated matrix of confidence intervals of test statistic values. + Only computed if set in cond_ind_test, where also the percentiles + are set. + """ + + if self.verbosity > 0: + print("\n##\n## Estimating lagged dependencies \n##" + "\n\nParameters:") + print("\nindependence test = %s" % self.cond_ind_test.measure + + "\ntau_min = %d" % tau_min + + "\ntau_max = %d" % tau_max) + + return self._run_mci_or_variants( + selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + parents=None, + max_conds_py=0, + max_conds_px=0, + val_only=val_only, + alpha_level=alpha_level, + fdr_method=fdr_method)
+ +
[docs] def run_fullci(self, + selected_links=None, + tau_min=0, + tau_max=1, + val_only=False, + alpha_level=0.05, + fdr_method='none'): + """FullCI conditional independence tests. + + Implements the FullCI test (see [1]_). + + Returns the matrices of test statistic values, (optionally corrected) + p-values, and (optionally) confidence intervals. Also (new in 4.3) + returns graph based on alpha_level (and optional FDR-correction). + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, default: 0 + Minimum time lag to test. Note that zero-lags are undirected. + tau_max : int, default: 1 + Maximum time lag. Must be larger or equal to tau_min. + val_only : bool, default: False + Option to only compute dependencies and not p-values. + alpha_level : float, optional (default: 0.05) + Significance level at which the p_matrix is thresholded to + get graph. + fdr_method : str, optional (default: 'fdr_bh') + Correction method, currently implemented is Benjamini-Hochberg + False Discovery Rate method. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values, optionally adjusted if fdr_method is + not 'none'. + conf_matrix : array of shape [N, N, tau_max+1,2] + Estimated matrix of confidence intervals of test statistic values. + Only computed if set in cond_ind_test, where also the percentiles + are set. + """ + + if self.verbosity > 0: + print("\n##\n## Running Tigramite FullCI algorithm\n##" + "\n\nParameters:") + print("\nindependence test = %s" % self.cond_ind_test.measure + + "\ntau_min = %d" % tau_min + + "\ntau_max = %d" % tau_max) + + full_past = dict([(j, [(i, -tau) + for i in range(self.N) + for tau in range(max(1, tau_min), tau_max + 1)]) + for j in range(self.N)]) + + return self._run_mci_or_variants( + selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + parents=full_past, + max_conds_py=None, + max_conds_px=0, + val_only=val_only, + alpha_level=alpha_level, + fdr_method=fdr_method)
+ +
[docs] def run_bivci(self, + selected_links=None, + tau_min=0, + tau_max=1, + val_only=False, + alpha_level=0.05, + fdr_method='none'): + """BivCI conditional independence tests. + + Implements the BivCI test (see [1]_). + + Returns the matrices of test statistic values, (optionally corrected) + p-values, and (optionally) confidence intervals. Also (new in 4.3) + returns graph based on alpha_level (and optional FDR-correction). + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, default: 0 + Minimum time lag to test. Note that zero-lags are undirected. + tau_max : int, default: 1 + Maximum time lag. Must be larger or equal to tau_min. + val_only : bool, default: False + Option to only compute dependencies and not p-values. + alpha_level : float, optional (default: 0.05) + Significance level at which the p_matrix is thresholded to + get graph. + fdr_method : str, optional (default: 'fdr_bh') + Correction method, currently implemented is Benjamini-Hochberg + False Discovery Rate method. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values, optionally adjusted if fdr_method is + not 'none'. + conf_matrix : array of shape [N, N, tau_max+1,2] + Estimated matrix of confidence intervals of test statistic values. + Only computed if set in cond_ind_test, where also the percentiles + are set. + """ + + if self.verbosity > 0: + print("\n##\n## Running Tigramite BivCI algorithm\n##" + "\n\nParameters:") + print("\nindependence test = %s" % self.cond_ind_test.measure + + "\ntau_min = %d" % tau_min + + "\ntau_max = %d" % tau_max) + + auto_past = dict([(j, [(j, -tau) + for tau in range(max(1, tau_min), tau_max + 1)]) + for j in range(self.N)]) + + return self._run_mci_or_variants( + selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + parents=auto_past, + max_conds_py=None, + max_conds_px=0, + val_only=val_only, + alpha_level=alpha_level, + fdr_method=fdr_method)
+ +
[docs] def get_corrected_pvalues(self, p_matrix, + fdr_method='fdr_bh', + exclude_contemporaneous=True, + tau_min=0, + tau_max=1, + selected_links=None, + ): + """Returns p-values corrected for multiple testing. + + Currently implemented is Benjamini-Hochberg False Discovery Rate + method. Correction is performed either among all links if + exclude_contemporaneous==False, or only among lagged links. + + Parameters + ---------- + p_matrix : array-like + Matrix of p-values. Must be of shape (N, N, tau_max + 1). + tau_min : int, default: 0 + Minimum time lag. Only used as consistency check of selected_links. + tau_max : int, default: 1 + Maximum time lag. Must be larger or equal to tau_min. Only used as + consistency check of selected_links. + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + fdr_method : str, optional (default: 'fdr_bh') + Correction method, currently implemented is Benjamini-Hochberg + False Discovery Rate method. + exclude_contemporaneous : bool, optional (default: True) + Whether to include contemporaneous links in correction. + + Returns + ------- + q_matrix : array-like + Matrix of shape (N, N, tau_max + 1) containing corrected p-values. + """ + + def _ecdf(x): + """No frills empirical cdf used in fdr correction. + """ + nobs = len(x) + return np.arange(1, nobs + 1) / float(nobs) + + # Get the shape parameters from the p_matrix + _, N, tau_max_plusone = p_matrix.shape + # Check the limits on tau + self._check_tau_limits(tau_min, tau_max) + # Include only selected_links if given + if selected_links != None: + # Create a mask for these values + mask = np.zeros((N, N, tau_max_plusone), dtype='bool') + _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max) + for node1, links_ in _int_sel_links.items(): + for node2, lag in links_: + mask[node2, node1, abs(lag)] = True + else: + # Create a mask for these values + mask = np.ones((N, N, tau_max_plusone), dtype='bool') + # Ignore values from autocorrelation indices + mask[range(N), range(N), 0] = False + # Exclude all contemporaneous values if requested + if exclude_contemporaneous: + mask[:, :, 0] = False + # Create the return value + q_matrix = np.array(p_matrix) + # Use the multiple tests function + if fdr_method is None or fdr_method == 'none': + pass + elif fdr_method == 'fdr_bh': + pvs = p_matrix[mask] + pvals_sortind = np.argsort(pvs) + pvals_sorted = np.take(pvs, pvals_sortind) + + ecdffactor = _ecdf(pvals_sorted) + + pvals_corrected_raw = pvals_sorted / ecdffactor + pvals_corrected = np.minimum.accumulate( + pvals_corrected_raw[::-1])[::-1] + del pvals_corrected_raw + + pvals_corrected[pvals_corrected > 1] = 1 + pvals_corrected_ = np.empty_like(pvals_corrected) + pvals_corrected_[pvals_sortind] = pvals_corrected + del pvals_corrected + + q_matrix[mask] = pvals_corrected_ + + else: + raise ValueError('Only FDR method fdr_bh implemented') + + # Return the new matrix + return q_matrix
+ +
[docs] def get_graph_from_pmatrix(self, p_matrix, alpha_level, + tau_min, tau_max, selected_links=None): + """Construct graph from thresholding the p_matrix at an alpha-level. + + Allows to take into account selected_links. + + Parameters + ---------- + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values, optionally adjusted if fdr_method is + not 'none'. + alpha_level : float, optional (default: 0.05) + Significance level at which the p_matrix is thresholded to + get graph. + tau_mix : int + Minimum time delay to test. + tau_max : int + Maximum time delay to test. + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + """ + + _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max) + + if selected_links != None: + # Create a mask for these values + mask = np.zeros((self.N, self.N, tau_max + 1), dtype='bool') + for node1, links_ in _int_sel_links.items(): + for node2, lag in links_: + mask[node2, node1, abs(lag)] = True + else: + # Create a mask for these values + mask = np.ones((self.N, self.N, tau_max + 1), dtype='bool') + + # Set all p-values of absent links to 1. + p_matrix[mask==False] == 1. + + # Threshold p_matrix to get graph + graph_bool = p_matrix <= alpha_level + + # Convert to string graph representation + graph = self.convert_to_string_graph(graph_bool) + + # Return the graph + return graph
+ +
[docs] def return_parents_dict(self, graph, + val_matrix, + include_lagzero_parents=False): + """Returns dictionary of parents sorted by val_matrix. + + If parents are unclear (link with o or x), then no parent + is returned. + + Parameters + ---------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + val_matrix : array-like + Matrix of test statistic values. Must be of shape (N, N, tau_max + + 1). + include_lagzero_parents : bool (default: False) + Whether the dictionary should also return parents at lag + zero. + + Returns + ------- + parents_dict : dict + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} + containing estimated parents. + """ + + # Initialize the return value + parents_dict = dict() + for j in range(self.N): + # Get the good links + if include_lagzero_parents: + good_links = np.argwhere(graph[:, j, :] == "-->") + # Build a dictionary from these links to their values + links = {(i, -tau): np.abs(val_matrix[i, j, abs(tau)]) + for i, tau in good_links} + else: + good_links = np.argwhere(graph[:, j, 1:] == "-->") + # Build a dictionary from these links to their values + links = {(i, -tau - 1): np.abs(val_matrix[i, j, abs(tau) + 1]) + for i, tau in good_links} + # Sort by value + parents_dict[j] = sorted(links, key=links.get, reverse=True) + + return parents_dict
+ + + + + + +
[docs] def print_results(self, + return_dict, + alpha_level=0.05): + """Prints significant parents from output of MCI or PCMCI algorithms. + + Parameters + ---------- + return_dict : dict + Dictionary of return values, containing keys + * 'p_matrix' + * 'val_matrix' + * 'conf_matrix' + + alpha_level : float, optional (default: 0.05) + Significance level. + """ + # Check if conf_matrix is defined + conf_matrix = None + conf_key = 'conf_matrix' + if conf_key in return_dict: + conf_matrix = return_dict[conf_key] + # Wrap the already defined function + if 'graph' in return_dict: + graph = return_dict['graph'] + else: + graph = None + if 'ambiguous_triples' in return_dict: + ambiguous_triples = return_dict['ambiguous_triples'] + else: + ambiguous_triples = None + self.print_significant_links(return_dict['p_matrix'], + return_dict['val_matrix'], + conf_matrix=conf_matrix, + graph=graph, + ambiguous_triples=ambiguous_triples, + alpha_level=alpha_level)
+ +
[docs] def run_pcmci(self, + selected_links=None, + tau_min=0, + tau_max=1, + save_iterations=False, + pc_alpha=0.05, + max_conds_dim=None, + max_combinations=1, + max_conds_py=None, + max_conds_px=None, + alpha_level=0.05, + fdr_method='none'): + """Runs PCMCI time-lagged causal discovery for time series. + + Wrapper around PC-algorithm function and MCI function. + + Notes + ----- + + The PCMCI causal discovery method is comprehensively described in [ + 1]_, where also analytical and numerical results are presented. Here + we briefly summarize the method. + + PCMCI estimates time-lagged causal links by a two-step procedure: + + 1. Condition-selection: For each variable :math:`j`, estimate a + *superset* of parents :math:`\\tilde{\mathcal{P}}(X^j_t)` with the + iterative PC1 algorithm, implemented as ``run_pc_stable``. The + condition-selection step reduces the dimensionality and avoids + conditioning on irrelevant variables. + + 2. *Momentary conditional independence* (MCI) + + .. math:: X^i_{t-\\tau} \perp X^j_{t} | \\tilde{\\mathcal{P}}( + X^j_t), \\tilde{\mathcal{P}}(X^i_{t-\\tau}) + + here implemented as ``run_mci``. This step estimates the p-values and + test statistic values for all links accounting for common drivers, + indirect links, and autocorrelation. + + NOTE: MCI test statistic values define a particular measure of causal + strength depending on the test statistic used. For example, ParCorr() + results in normalized values between -1 and 1. However, if you are + interested in quantifying causal effects, i.e., the effect of + hypothetical interventions, you may better look at the causal effect + estimation functionality of Tigramite. + + PCMCI can be flexibly combined with any kind of conditional + independence test statistic adapted to the kind of data (continuous + or discrete) and its assumed dependency types. These are available in + ``tigramite.independence_tests``. + + The main free parameters of PCMCI (in addition to free parameters of + the conditional independence test statistic) are the maximum time + delay :math:`\\tau_{\\max}` (``tau_max``) and the significance + threshold in the condition-selection step :math:`\\alpha` ( + ``pc_alpha``). The maximum time delay depends on the application and + should be chosen according to the maximum causal time lag expected in + the complex system. We recommend a rather large choice that includes + peaks in the ``get_lagged_dependencies`` function. :math:`\\alpha` + should not be seen as a significance test level in the + condition-selection step since the iterative hypothesis tests do not + allow for a precise assessment. :math:`\\alpha` rather takes the role + of a regularization parameter in model-selection techniques. If a + list of values is given or ``pc_alpha=None``, :math:`\\alpha` is + optimized using model selection criteria implemented in the respective + ``tigramite.independence_tests``. + + Further optional parameters are discussed in [1]_. + + Examples + -------- + >>> import numpy + >>> from tigramite.pcmci import PCMCI + >>> from tigramite.independence_tests import ParCorr + >>> import tigramite.data_processing as pp + >>> from tigramite.toymodels import structural_causal_processes as toys + >>> numpy.random.seed(7) + >>> # Example process to play around with + >>> # Each key refers to a variable and the incoming links are supplied + >>> # as a list of format [((driver, -lag), coeff), ...] + >>> links_coeffs = {0: [((0, -1), 0.8)], + 1: [((1, -1), 0.8), ((0, -1), 0.5)], + 2: [((2, -1), 0.8), ((1, -2), -0.6)]} + >>> data, _ = toys.var_process(links_coeffs, T=1000) + >>> # Data must be array of shape (time, variables) + >>> print (data.shape) + (1000, 3) + >>> dataframe = pp.DataFrame(data) + >>> cond_ind_test = ParCorr() + >>> pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) + >>> results = pcmci.run_pcmci(tau_max=2, pc_alpha=None) + >>> pcmci.print_significant_links(p_matrix=results['p_matrix'], + val_matrix=results['val_matrix'], + alpha_level=0.05) + ## Significant parents at alpha = 0.05: + + Variable 0 has 1 link(s): + (0 -1): pval = 0.00000 | val = 0.588 + + Variable 1 has 2 link(s): + (1 -1): pval = 0.00000 | val = 0.606 + (0 -1): pval = 0.00000 | val = 0.447 + + Variable 2 has 2 link(s): + (2 -1): pval = 0.00000 | val = 0.618 + (1 -2): pval = 0.00000 | val = -0.499 + + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, optional (default: 0) + Minimum time lag to test. Note that zero-lags are undirected. + tau_max : int, optional (default: 1) + Maximum time lag. Must be larger or equal to tau_min. + save_iterations : bool, optional (default: False) + Whether to save iteration step results such as conditions used. + pc_alpha : float, optional (default: 0.05) + Significance level in algorithm. + max_conds_dim : int, optional (default: None) + Maximum number of conditions to test. If None is passed, this number + is unrestricted. + max_combinations : int, optional (default: 1) + Maximum number of combinations of conditions of current cardinality + to test. Defaults to 1 for PC_1 algorithm. For original PC algorithm + a larger number, such as 10, can be used. + max_conds_py : int, optional (default: None) + Maximum number of conditions of Y to use. If None is passed, this + number is unrestricted. + max_conds_px : int, optional (default: None) + Maximum number of conditions of Z to use. If None is passed, this + number is unrestricted. + alpha_level : float, optional (default: 0.05) + Significance level at which the p_matrix is thresholded to + get graph. + fdr_method : str, optional (default: 'fdr_bh') + Correction method, currently implemented is Benjamini-Hochberg + False Discovery Rate method. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values, optionally adjusted if fdr_method is + not 'none'. + conf_matrix : array of shape [N, N, tau_max+1,2] + Estimated matrix of confidence intervals of test statistic values. + Only computed if set in cond_ind_test, where also the percentiles + are set. + + """ + + # Get the parents from run_pc_stable + all_parents = self.run_pc_stable(selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + save_iterations=save_iterations, + pc_alpha=pc_alpha, + max_conds_dim=max_conds_dim, + max_combinations=max_combinations) + # Get the results from run_mci, using the parents as the input + results = self.run_mci(selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + parents=all_parents, + max_conds_py=max_conds_py, + max_conds_px=max_conds_px, + alpha_level=alpha_level, + fdr_method=fdr_method) + + # Store the parents in the pcmci member + self.all_parents = all_parents + + # Print the information + # if self.verbosity > 0: + # self.print_results(results) + # Return the dictionary + self.results = results + return results
+ +
[docs] def run_pcmciplus(self, + selected_links=None, + tau_min=0, + tau_max=1, + pc_alpha=0.01, + contemp_collider_rule='majority', + conflict_resolution=True, + reset_lagged_links=False, + max_conds_dim=None, + max_conds_py=None, + max_conds_px=None, + max_conds_px_lagged=None, + fdr_method='none', + ): + """Runs PCMCIplus time-lagged and contemporaneous causal discovery for + time series. + + Method described in [5]_: + http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf + + Notes + ----- + + The PCMCIplus causal discovery method is described in [5]_, where + also analytical and numerical results are presented. In contrast to + PCMCI, PCMCIplus can identify the full, lagged and contemporaneous, + causal graph (up to the Markov equivalence class for contemporaneous + links) under the standard assumptions of Causal Sufficiency, + Faithfulness and the Markov condition. + + PCMCIplus estimates time-lagged and contemporaneous causal links by a + four-step procedure: + + 1. Condition-selection (same as for PCMCI): For each variable + :math:`j`, estimate a *superset* of lagged parents :math:`\\widehat{ + \\mathcal{B}}_t^-( X^j_t)` with the iterative PC1 algorithm, + implemented as ``run_pc_stable``. The condition-selection step + reduces the dimensionality and avoids conditioning on irrelevant + variables. + + 2. PC skeleton phase with contemporaneous conditions and *Momentary + conditional independence* (MCI) tests: Iterate through subsets + :math:`\\mathcal{S}` of contemporaneous adjacencies and conduct MCI + conditional independence tests: + + .. math:: X^i_{t-\\tau} ~\\perp~ X^j_{t} ~|~ \\mathcal{S}, + \\widehat{\\mathcal{B}}_t^-(X^j_t), + \\widehat{\\mathcal{B}}_{t-\\tau}^-(X^i_{t-{\\tau}}) + + here implemented as ``run_pcalg``. This step estimates the p-values and + test statistic values for all lagged and contemporaneous adjacencies + accounting for common drivers, indirect links, and autocorrelation. + + 3. PC collider orientation phase: Orient contemporaneous collider + motifs based on unshielded triples. Optionally apply conservative or + majority rule (also based on MCI tests). + + 4. PC rule orientation phase: Orient remaining contemporaneous + links based on PC rules. + + In contrast to PCMCI, the relevant output of PCMCIplus is the + array ``graph``. Its string entries are interpreted as follows: + + * ``graph[i,j,tau]=-->`` for :math:`\\tau>0` denotes a directed, lagged + causal link from :math:`i` to :math:`j` at lag :math:`\\tau` + + * ``graph[i,j,0]=-->`` (and ``graph[j,i,0]=<--``) denotes a directed, + contemporaneous causal link from :math:`i` to :math:`j` + + * ``graph[i,j,0]=o-o`` (and ``graph[j,i,0]=o-o``) denotes an unoriented, + contemporaneous adjacency between :math:`i` and :math:`j` indicating + that the collider and orientation rules could not be applied (Markov + equivalence) + + * ``graph[i,j,0]=x-x`` and (``graph[j,i,0]=x-x``) denotes a conflicting, + contemporaneous adjacency between :math:`i` and :math:`j` indicating + that the directionality is undecided due to conflicting orientation + rules + + Importantly, ``p_matrix`` and ``val_matrix`` for PCMCIplus quantify + the uncertainty and strength, respectively, only for the + adjacencies, but not for the directionality of contemporaneous links. + Note that lagged links are always oriented due to time order. + + PCMCIplus can be flexibly combined with any kind of conditional + independence test statistic adapted to the kind of data (continuous + or discrete) and its assumed dependency types. These are available in + ``tigramite.independence_tests``. + + The main free parameters of PCMCIplus (in addition to free parameters of + the conditional independence tests) are the maximum time delay + :math:`\\tau_{\\max}` (``tau_max``) and the significance threshold + :math:`\\alpha` ( ``pc_alpha``). + + If a list or None is passed for ``pc_alpha``, the significance level is + optimized for every graph across the given ``pc_alpha`` values using the + score computed in ``cond_ind_test.get_model_selection_criterion()``. + Since PCMCIplus outputs not a DAG, but an equivalence class of DAGs, + first one member of this class is computed and then the score is + computed as the average over all models fits for each variable in ``[0, + ..., N]`` for that member. The score is the same for all members of the + class. + + The maximum time delay depends on the application and should be chosen + according to the maximum causal time lag expected in the complex system. + We recommend a rather large choice that includes peaks in the + ``get_lagged_dependencies`` function. Another important parameter is + ``contemp_collider_rule``. Only if set to ``majority`` or + ``conservative'' and together with ``conflict_resolution=True``, + PCMCIplus is fully *order independent* meaning that the order of the N + variables in the dataframe does not matter. Last, the default option + ``reset_lagged_links=False`` restricts the detection of lagged causal + links in Step 2 to the significant adjacencies found in Step 1, given by + :math:`\\widehat{ \\mathcal{B}}_t^-( X^j_t)`. For + ``reset_lagged_links=True``, *all* lagged links are considered again, + which improves detection power for lagged links, but also leads to + larger runtimes. + + Further optional parameters are discussed in [5]_. + + Examples + -------- + >>> import numpy as np + >>> from tigramite.pcmci import PCMCI + >>> from tigramite.independence_tests import ParCorr + >>> import tigramite.data_processing as pp + >>> from tigramite.toymodels import structural_causal_processes as toys + >>> # Example process to play around with + >>> # Each key refers to a variable and the incoming links are supplied + >>> # as a list of format [((var, -lag), coeff, function), ...] + >>> def lin_f(x): return x + >>> links = {0: [((0, -1), 0.9, lin_f)], + 1: [((1, -1), 0.8, lin_f), ((0, -1), 0.8, lin_f)], + 2: [((2, -1), 0.7, lin_f), ((1, 0), 0.6, lin_f)], + 3: [((3, -1), 0.7, lin_f), ((2, 0), -0.5, lin_f)], + } + >>> data, nonstat = toys.structural_causal_process(links, + T=1000, seed=7) + >>> # Data must be array of shape (time, variables) + >>> print (data.shape) + (1000, 4) + >>> dataframe = pp.DataFrame(data) + >>> cond_ind_test = ParCorr() + >>> pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test) + >>> results = pcmci.run_pcmciplus(tau_min=0, tau_max=2, pc_alpha=0.01) + >>> pcmci.print_results(results, alpha_level=0.01) + ## Significant links at alpha = 0.01: + + Variable 0 has 1 link(s): + (0 -1): pval = 0.00000 | val = 0.676 + + Variable 1 has 2 link(s): + (1 -1): pval = 0.00000 | val = 0.602 + (0 -1): pval = 0.00000 | val = 0.599 + + Variable 2 has 2 link(s): + (1 0): pval = 0.00000 | val = 0.486 + (2 -1): pval = 0.00000 | val = 0.466 + + Variable 3 has 2 link(s): + (3 -1): pval = 0.00000 | val = 0.524 + (2 0): pval = 0.00000 | val = -0.449 + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0: [(3, 0), (0, -1), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + tau_min : int, optional (default: 0) + Minimum time lag to test. + tau_max : int, optional (default: 1) + Maximum time lag. Must be larger or equal to tau_min. + pc_alpha : float or list of floats, default: 0.01 + Significance level in algorithm. If a list or None is passed, the + pc_alpha level is optimized for every graph across the given + pc_alpha values ([0.001, 0.005, 0.01, 0.025, 0.05] for None) using + the score computed in cond_ind_test.get_model_selection_criterion(). + contemp_collider_rule : {'majority', 'conservative', 'none'} + Rule for collider phase to use. See the paper for details. Only + 'majority' and 'conservative' lead to an order-independent + algorithm. + conflict_resolution : bool, optional (default: True) + Whether to mark conflicts in orientation rules. Only for True + this leads to an order-independent algorithm. + reset_lagged_links : bool, optional (default: False) + Restricts the detection of lagged causal links in Step 2 to the + significant adjacencies found in the PC1 algorithm in Step 1. For + True, *all* lagged links are considered again, which improves + detection power for lagged links, but also leads to larger + runtimes. + max_conds_dim : int, optional (default: None) + Maximum number of conditions to test. If None is passed, this number + is unrestricted. + max_conds_py : int, optional (default: None) + Maximum number of lagged conditions of Y to use in MCI tests. If + None is passed, this number is unrestricted. + max_conds_px : int, optional (default: None) + Maximum number of lagged conditions of X to use in MCI tests. If + None is passed, this number is unrestricted. + max_conds_px_lagged : int, optional (default: None) + Maximum number of lagged conditions of X when X is lagged in MCI + tests. If None is passed, this number is equal to max_conds_px. + fdr_method : str, optional (default: 'none') + Correction method, default is Benjamini-Hochberg False Discovery + Rate method. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Resulting causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values regarding adjacencies. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values regarding adjacencies. + sepset : dictionary + Separating sets. See paper for details. + ambiguous_triples : list + List of ambiguous triples, only relevant for 'majority' and + 'conservative' rules, see paper for details. + """ + + # Check if pc_alpha is chosen to optimze over a list + if pc_alpha is None or isinstance(pc_alpha, (list, tuple, np.ndarray)): + # Call optimizer wrapper around run_pcmciplus() + return self._optimize_pcmciplus_alpha( + selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + pc_alpha=pc_alpha, + contemp_collider_rule=contemp_collider_rule, + conflict_resolution=conflict_resolution, + reset_lagged_links=reset_lagged_links, + max_conds_dim=max_conds_dim, + max_conds_py=max_conds_py, + max_conds_px=max_conds_px, + max_conds_px_lagged=max_conds_px_lagged, + fdr_method=fdr_method) + + # else: + # raise ValueError("pc_alpha=None not supported in PCMCIplus, choose" + # " 0 < pc_alpha < 1 (e.g., 0.01)") + + # For the lagged PC algorithm only the strongest conditions are tested + max_combinations = 1 + + # Check the limits on tau + self._check_tau_limits(tau_min, tau_max) + # Set the selected links + _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max) + + # Step 1: Get a superset of lagged parents from run_pc_stable + lagged_parents = self.run_pc_stable(selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + pc_alpha=pc_alpha, + max_conds_dim=max_conds_dim, + max_combinations=max_combinations) + + p_matrix = self.p_matrix + val_matrix = self.val_matrix + + # Step 2+3+4: PC algorithm with contemp. conditions and MCI tests + if self.verbosity > 0: + print("\n##\n## Step 2: PC algorithm with contemp. conditions " + "and MCI tests\n##" + "\n\nParameters:") + if selected_links is not None: + print("\nselected_links = %s" % _int_sel_links) + print("\nindependence test = %s" % self.cond_ind_test.measure + + "\ntau_min = %d" % tau_min + + "\ntau_max = %d" % tau_max + + "\npc_alpha = %s" % pc_alpha + + "\ncontemp_collider_rule = %s" % contemp_collider_rule + + "\nconflict_resolution = %s" % conflict_resolution + + "\nreset_lagged_links = %s" % reset_lagged_links + + "\nmax_conds_dim = %s" % max_conds_dim + + "\nmax_conds_py = %s" % max_conds_py + + "\nmax_conds_px = %s" % max_conds_px + + "\nmax_conds_px_lagged = %s" % max_conds_px_lagged + + "\nfdr_method = %s" % fdr_method + ) + + # Set the maximum condition dimension for Y and X + max_conds_py = self._set_max_condition_dim(max_conds_py, + tau_min, tau_max) + max_conds_px = self._set_max_condition_dim(max_conds_px, + tau_min, tau_max) + + if reset_lagged_links: + # Run PCalg on full graph, ignoring that some lagged links + # were determined as non-significant in PC1 step + links_for_pc = deepcopy(_int_sel_links) + else: + # Run PCalg only on lagged parents found with PC1 + # plus all contemporaneous links + links_for_pc = deepcopy(lagged_parents) + for j in range(self.N): + for link in _int_sel_links[j]: + i, tau = link + if abs(tau) == 0: + links_for_pc[j].append((i, 0)) + + results = self.run_pcalg( + selected_links=links_for_pc, + pc_alpha=pc_alpha, + tau_min=tau_min, + tau_max=tau_max, + max_conds_dim=max_conds_dim, + max_combinations=None, + lagged_parents=lagged_parents, + max_conds_py=max_conds_py, + max_conds_px=max_conds_px, + max_conds_px_lagged=max_conds_px_lagged, + mode='contemp_conds', + contemp_collider_rule=contemp_collider_rule, + conflict_resolution=conflict_resolution) + + graph = results['graph'] + + # Update p_matrix and val_matrix with values from links_for_pc + for j in range(self.N): + for link in links_for_pc[j]: + i, tau = link + p_matrix[i, j, abs(tau)] = results['p_matrix'][i, j, abs(tau)] + val_matrix[i, j, abs(tau)] = results['val_matrix'][i, j, + abs(tau)] + + # Update p_matrix and val_matrix for indices of symmetrical links + p_matrix[:, :, 0] = results['p_matrix'][:, :, 0] + val_matrix[:, :, 0] = results['val_matrix'][:, :, 0] + + ambiguous = results['ambiguous_triples'] + + conf_matrix = None + # TODO: implement confidence estimation, but how? + # if self.cond_ind_test.confidence is not False: + # conf_matrix = results['conf_matrix'] + + # Correct the p_matrix if there is a fdr_method + if fdr_method != 'none': + p_matrix = self.get_corrected_pvalues(p_matrix=p_matrix, tau_min=tau_min, + tau_max=tau_max, + selected_links=_int_sel_links, + fdr_method=fdr_method) + + # Store the parents in the pcmci member + self.all_lagged_parents = lagged_parents + + # Cache the resulting values in the return dictionary + return_dict = {'graph': graph, + 'val_matrix': val_matrix, + 'p_matrix': p_matrix, + 'ambiguous_triples': ambiguous, + 'conf_matrix': conf_matrix} + # Print the results + if self.verbosity > 0: + self.print_results(return_dict, alpha_level=pc_alpha) + # Return the dictionary + self.results = return_dict + return return_dict
+ +
[docs] def run_pcalg(self, selected_links=None, pc_alpha=0.01, tau_min=0, + tau_max=1, max_conds_dim=None, max_combinations=None, + lagged_parents=None, max_conds_py=None, max_conds_px=None, + max_conds_px_lagged=None, + mode='standard', contemp_collider_rule='majority', + conflict_resolution=True): + + """Runs PC algorithm for time-lagged and contemporaneous causal + discovery for time series. + + For ``mode='contemp_conds'`` this implements Steps 2-4 of the + PCMCIplus method described in [5]_. For ``mode='standard'`` this + implements the standard PC algorithm adapted to time series. + + Parameters + ---------- + selected_links : dict or None + Dictionary of form {0: [(3, 0), (0, -1), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + lagged_parents : dictionary + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing + additional conditions for each CI test. As part of PCMCIplus + these are the superset of lagged parents estimated with the PC1 + algorithm. + mode : {'standard', 'contemp_conds'} + For ``mode='contemp_conds'`` this implements Steps 2-4 of the + PCMCIplus method. For ``mode='standard'`` this implements the + standard PC algorithm adapted to time series. + tau_min : int, optional (default: 0) + Minimum time lag to test. + tau_max : int, optional (default: 1) + Maximum time lag. Must be larger or equal to tau_min. + pc_alpha : float, optional (default: 0.01) + Significance level. + contemp_collider_rule : {'majority', 'conservative', 'none'} + Rule for collider phase to use. See the paper for details. Only + 'majority' and 'conservative' lead to an order-independent + algorithm. + conflict_resolution : bool, optional (default: True) + Whether to mark conflicts in orientation rules. Only for True + this leads to an order-independent algorithm. + max_conds_dim : int, optional (default: None) + Maximum number of conditions to test. If None is passed, this number + is unrestricted. + max_combinations : int + Maximum number of combinations of conditions of current cardinality + to test. + max_conds_py : int, optional (default: None) + Maximum number of lagged conditions of Y to use in MCI tests. If + None is passed, this number is unrestricted. + max_conds_px : int, optional (default: None) + Maximum number of lagged conditions of X to use in MCI tests. If + None is passed, this number is unrestricted. + max_conds_px_lagged : int, optional (default: None) + Maximum number of lagged conditions of X when X is lagged in MCI + tests. If None is passed, this number is equal to max_conds_px. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Resulting causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values regarding adjacencies. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values regarding adjacencies. + sepset : dictionary + Separating sets. See paper for details. + ambiguous_triples : list + List of ambiguous triples, only relevant for 'majority' and + 'conservative' rules, see paper for details. + """ + # TODO: save_iterations + + # Sanity checks + if pc_alpha is None: + raise ValueError("pc_alpha=None not supported in PC algorithm, " + "choose 0 < pc_alpha < 1 (e.g., 0.01)") + + if mode not in ['contemp_conds', 'standard']: + raise ValueError("mode must be either 'contemp_conds' or " + "'standard'") + + # Check the limits on tau + self._check_tau_limits(tau_min, tau_max) + # Set the selected links + _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max) + + if max_conds_dim is None: + if mode == 'standard': + max_conds_dim = self._set_max_condition_dim(max_conds_dim, + tau_min, tau_max) + elif mode == 'contemp_conds': + max_conds_dim = self.N + + if max_combinations is None: + max_combinations = np.inf + + # Initialize initial graph from selected_links + initial_graph = np.zeros((self.N, self.N, tau_max + 1), dtype='uint8') + for j in range(self.N): + for link in _int_sel_links[j]: + i, tau = link + initial_graph[i, j, abs(tau)] = 1 + + skeleton_results = self._pcalg_skeleton( + initial_graph=initial_graph, + lagged_parents=lagged_parents, + mode=mode, + pc_alpha=pc_alpha, + tau_min=tau_min, + tau_max=tau_max, + max_conds_dim=max_conds_dim, + max_combinations=max_combinations, + max_conds_py=max_conds_py, + max_conds_px=max_conds_px, + max_conds_px_lagged=max_conds_px_lagged, + ) + + skeleton_graph = skeleton_results['graph'] + sepset = skeleton_results['sepset'] + + colliders_step_results = self._pcalg_colliders( + graph=skeleton_graph, + sepset=sepset, + lagged_parents=lagged_parents, + mode=mode, + pc_alpha=pc_alpha, + tau_max=tau_max, + max_conds_py=max_conds_py, + max_conds_px=max_conds_px, + max_conds_px_lagged=max_conds_px_lagged, + conflict_resolution=conflict_resolution, + contemp_collider_rule=contemp_collider_rule, + ) + + collider_graph = colliders_step_results['graph'] + ambiguous_triples = colliders_step_results['ambiguous_triples'] + + final_graph = self._pcalg_rules_timeseries( + graph=collider_graph, + ambiguous_triples=ambiguous_triples, + conflict_resolution=conflict_resolution, + ) + + # Symmetrize p_matrix and val_matrix + symmetrized_results = self.symmetrize_p_and_val_matrix( + p_matrix=skeleton_results['p_matrix'], + val_matrix=skeleton_results['val_matrix'], + selected_links=_int_sel_links, + conf_matrix=None) + + # Convert numerical graph matrix to string + graph_str = self.convert_to_string_graph(final_graph) + + pc_results = { + 'graph': graph_str, + 'p_matrix': symmetrized_results['p_matrix'], + 'val_matrix': symmetrized_results['val_matrix'], + 'sepset': colliders_step_results['sepset'], + 'ambiguous_triples': colliders_step_results['ambiguous_triples'], + } + + if self.verbosity > 1: + print("\n-----------------------------") + print("PCMCIplus algorithm finished.") + print("-----------------------------") + + self.pc_results = pc_results + return pc_results
+ +
[docs] def run_pcalg_non_timeseries_data(self, pc_alpha=0.01, + max_conds_dim=None, max_combinations=None, + contemp_collider_rule='majority', + conflict_resolution=True): + + """Runs PC algorithm for non-time series data. + + Simply calls run_pcalg with tau_min = tau_max = 0. + Removes lags from ouput dictionaries. + + Parameters + ---------- + pc_alpha : float, optional (default: 0.01) + Significance level. + contemp_collider_rule : {'majority', 'conservative', 'none'} + Rule for collider phase to use. See the paper for details. Only + 'majority' and 'conservative' lead to an order-independent + algorithm. + conflict_resolution : bool, optional (default: True) + Whether to mark conflicts in orientation rules. Only for True + this leads to an order-independent algorithm. + max_conds_dim : int, optional (default: None) + Maximum number of conditions to test. If None is passed, this number + is unrestricted. + max_combinations : int + Maximum number of combinations of conditions of current cardinality + to test. + + Returns + ------- + graph : array of shape [N, N, 1] + Resulting causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, 1] + Estimated matrix of test statistic values regarding adjacencies. + p_matrix : array of shape [N, N, 1] + Estimated matrix of p-values regarding adjacencies. + sepset : dictionary + Separating sets. See paper for details. + ambiguous_triples : list + List of ambiguous triples, only relevant for 'majority' and + 'conservative' rules, see paper for details. + """ + + results = self.run_pcalg(pc_alpha=pc_alpha, tau_min=0, tau_max=0, + max_conds_dim=max_conds_dim, max_combinations=max_combinations, + mode='standard', contemp_collider_rule=contemp_collider_rule, + conflict_resolution=conflict_resolution) + + # Remove tau-dimension + # results['graph'] = results['graph'].squeeze() + # results['val_matrix'] = results['val_matrix'].squeeze() + # results['p_matrix'] = results['p_matrix'].squeeze() + old_sepsets = results['sepset'].copy() + results['sepset'] = {} + for old_sepset in old_sepsets: + new_sepset = (old_sepset[0][0], old_sepset[1]) + conds = [cond[0] for cond in old_sepsets[old_sepset]] + + results['sepset'][new_sepset] = conds + + ambiguous_triples = results['ambiguous_triples'].copy() + results['ambiguous_triples'] = [] + for triple in ambiguous_triples: + new_triple = (triple[0][0], triple[1], triple[2]) + + results['ambiguous_triples'].append(new_triple) + + self.pc_results = results + return results
+ + + def _run_pcalg_test(self, i, abstau, j, S, lagged_parents, max_conds_py, + max_conds_px, max_conds_px_lagged, tau_max): + """MCI conditional independence tests within PCMCIplus or PC algorithm. + + Parameters + ---------- + i : int + Variable index. + abstau : int + Time lag (absolute value). + j : int + Variable index. + S : list + List of contemporaneous conditions. + lagged_parents : dictionary of lists + Dictionary of lagged parents for each node. + max_conds_py : int + Max number of lagged parents for node j. + max_conds_px : int + Max number of lagged parents for lagged node i. + max_conds_px_lagged : int + Maximum number of lagged conditions of X when X is lagged in MCI + tests. If None is passed, this number is equal to max_conds_px. + tau_max : int + Maximum time lag. + + Returns + ------- + val : float + Test statistic value. + pval : float + Test statistic p-value. + Z : list + List of conditions. + """ + + # Perform independence test adding lagged parents + if lagged_parents is not None: + conds_y = lagged_parents[j][:max_conds_py] + # Get the conditions for node i + if abstau == 0: + conds_x = lagged_parents[i][:max_conds_px] + else: + if max_conds_px_lagged is None: + conds_x = lagged_parents[i][:max_conds_px] + else: + conds_x = lagged_parents[i][:max_conds_px_lagged] + + else: + conds_y = conds_x = [] + # Shift the conditions for X by tau + conds_x_lagged = [(k, -abstau + k_tau) for k, k_tau in conds_x] + + Z = [node for node in S] + Z += [node for node in conds_y if + node != (i, -abstau) and node not in Z] + # Remove overlapping nodes between conds_x_lagged and conds_y + Z += [node for node in conds_x_lagged if node not in Z] + + val, pval = self.cond_ind_test.run_test(X=[(i, -abstau)], Y=[(j, 0)], + Z=Z, tau_max=tau_max, + # verbosity=self.verbosity + ) + + return val, pval, Z + + def _print_triple_info(self, triple, index, n_triples): + """Print info about the current triple being tested. + + Parameters + ---------- + triple : tuple + Standard ((i, tau), k, j) tuple of nodes and time delays. + index : int + Index of triple. + n_triples : int + Total number of triples. + """ + (i, tau), k, j = triple + link_marker = {True:"o-o", False:"-->"} + + print("\n Triple (%s % d) %s %s o-o %s (%d/%d)" % ( + self.var_names[i], tau, link_marker[tau==0], self.var_names[k], + self.var_names[j], index + 1, n_triples)) + + + def _tests_remaining(self, i, j, abstau, graph, adjt, p): + """Helper function returning whether a certain pair still needs to be + tested.""" + return graph[i, j, abstau] != 0 and len( + [a for a in adjt[j] if a != (i, -abstau)]) >= p + + def _any_tests_remaining(self, graph, adjt, tau_min, tau_max, p): + """Helper function returning whether any pair still needs to be + tested.""" + remaining_pairs = self._remaining_pairs(graph, adjt, tau_min, tau_max, + p) + + if len(remaining_pairs) > 0: + return True + else: + return False + + def _remaining_pairs(self, graph, adjt, tau_min, tau_max, p): + """Helper function returning the remaining pairs that still need to be + tested.""" + N = graph.shape[0] + pairs = [] + for (i, j) in itertools.product(range(N), range(N)): + for abstau in range(tau_min, tau_max + 1): + if (graph[i, j, abstau] != 0 + and len( + [a for a in adjt[j] if a != (i, -abstau)]) >= p): + pairs.append((i, j, abstau)) + + return pairs + + def _pcalg_skeleton(self, + initial_graph, + lagged_parents, + mode, + pc_alpha, + tau_min, + tau_max, + max_conds_dim, + max_combinations, + max_conds_py, + max_conds_px, + max_conds_px_lagged, + ): + """Implements the skeleton discovery step of the PC algorithm for + time series. + + Parameters + ---------- + initial_graph : array of shape (N, N, tau_max+1) or None + Initial graph. + lagged_parents : dictionary + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing + additional conditions for each CI test. As part of PCMCIplus + these are the superset of lagged parents estimated with the PC1 + algorithm. + mode : {'standard', 'contemp_conds'} + For ``mode='contemp_conds'`` this implements Steps 2-4 of the + PCMCIplus method. For ``mode='standard'`` this implements the + standard PC algorithm adapted to time series. + tau_min : int, optional (default: 0) + Minimum time lag to test. + tau_max : int, optional (default: 1) + Maximum time lag. Must be larger or equal to tau_min. + pc_alpha : float, optional (default: 0.01) + Significance level. + max_conds_dim : int, optional (default: None) + Maximum number of conditions to test. If None is passed, this number + is unrestricted. + max_combinations : int + Maximum number of combinations of conditions of current cardinality + to test. + max_conds_py : int, optional (default: None) + Maximum number of lagged conditions of Y to use in MCI tests. If + None is passed, this number is unrestricted. + max_conds_px : int, optional (default: None) + Maximum number of lagged conditions of X to use in MCI tests. If + None is passed, this number is unrestricted. + max_conds_px_lagged : int, optional (default: None) + Maximum number of lagged conditions of X when X is lagged in MCI + tests. If None is passed, this number is equal to max_conds_px. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Resulting causal graph, see description above for interpretation. + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values regarding adjacencies. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values regarding adjacencies. + sepset : dictionary + Separating sets. See paper for details. + """ + N = self.N + + # Form complete graph + if initial_graph is None: + graph = np.ones((N, N, tau_max + 1), dtype='int') + else: + graph = initial_graph + + # Remove lag-zero self-loops + graph[range(N), range(N), 0] = 0 + + # Define adjacencies for standard and contemp_conds mode + if mode == 'contemp_conds': + adjt = self._get_adj_time_series_contemp(graph) + elif mode == 'standard': + adjt = self._get_adj_time_series(graph) + + val_matrix = np.zeros((N, N, tau_max + 1)) + val_min = dict() + for j in range(self.N): + val_min[j] = {(p[0], -p[1]): np.inf + for p in zip(*np.where(graph[:, j, :]))} + + # Initialize p-values. Set to 1 if there's no link in the initial graph + pvalues = np.zeros((N, N, tau_max + 1)) + pvalues[graph == 0] = 1. + pval_max = dict() + for j in range(self.N): + pval_max[j] = {(p[0], -p[1]): 0. + for p in zip(*np.where(graph[:, j, :]))} + + # TODO: Remove sepset alltogether? + # Intialize sepsets that store the conditions that make i and j + # independent + sepset = self._get_sepset(tau_min, tau_max) + + if self.verbosity > 1: + print("\n--------------------------") + print("Skeleton discovery phase") + print("--------------------------") + + # Start with zero cardinality conditions + p = 0 + while (self._any_tests_remaining(graph, adjt, tau_min, tau_max, + p) and p <= max_conds_dim): + if self.verbosity > 1: + print( + "\nTesting contemporaneous condition sets of dimension " + "%d: " % p) + + remaining_pairs = self._remaining_pairs(graph, adjt, tau_min, + tau_max, p) + n_remaining = len(remaining_pairs) + for ir, (i, j, abstau) in enumerate(remaining_pairs): + # Check if link was not already removed (contemp links) + if graph[i, j, abstau]: + if self.verbosity > 1: + self._print_link_info(j=j, index_parent=ir, + parent=(i, -abstau), + num_parents=n_remaining) + + # Generate all subsets of conditions of cardinality p + conditions = list(itertools.combinations( + [(k, tauk) for (k, tauk) in adjt[j] + if not (k == i and tauk == -abstau)], p)) + + n_conditions = len(conditions) + if self.verbosity > 1: + print( + " Iterate through %d subset(s) of conditions: " + % n_conditions) + if lagged_parents is not None: + self._print_pcmciplus_conditions(lagged_parents, i, + j, abstau, + max_conds_py, + max_conds_px, + max_conds_px_lagged) + nonsig = False + # Iterate through condition sets + for q, S in enumerate(conditions): + if q > max_combinations: + break + + # Run MCI test + val, pval, Z = self._run_pcalg_test( + i, abstau, j, S, lagged_parents, max_conds_py, + max_conds_px, max_conds_px_lagged, tau_max) + + # Store minimum test statistic value for sorting adjt + # (only internally used) + val_min[j][(i, -abstau)] = min(np.abs(val), + val_min[j].get( + (i, -abstau))) + # Store maximum p-value (only internally used) + pval_max[j][(i, -abstau)] = max(pval, + pval_max[j].get( + (i, -abstau))) + + # Store max. p-value and corresponding value to return + if pval >= pvalues[i, j, abstau]: + pvalues[i, j, abstau] = pval + val_matrix[i, j, abstau] = val + + if self.verbosity > 1: + self._print_cond_info(Z=S, comb_index=q, pval=pval, + val=val) + + # If conditional independence is found, remove link + # from graph and store sepset + if pval > pc_alpha: + nonsig = True + if abstau == 0: + graph[i, j, 0] = graph[j, i, 0] = 0 + sepset[((i, 0), j)] = sepset[ + ((j, 0), i)] = list(S) + else: + graph[i, j, abstau] = 0 + sepset[((i, -abstau), j)] = list(S) + break + + # Print the results if needed + if self.verbosity > 1: + self._print_a_pc_result(nonsig, + conds_dim=p, + max_combinations= + max_combinations) + else: + self._print_link_info(j=j, index_parent=ir, + parent=(i, -abstau), + num_parents=n_remaining, + already_removed=True) + + # Increase condition cardinality + p += 1 + + # Re-compute adj and sort by minimum absolute test statistic value + if mode == 'contemp_conds': + adjt = self._get_adj_time_series_contemp(graph, sort_by=val_min) + elif mode == 'standard': + adjt = self._get_adj_time_series(graph, sort_by=val_min) + + if self.verbosity > 1: + print("\nUpdated contemp. adjacencies:") + self._print_parents(all_parents=adjt, val_min=val_min, + pval_max=pval_max) + + if self.verbosity > 1: + if not (self._any_tests_remaining(graph, adjt, tau_min, tau_max, + p) and p <= max_conds_dim): + print("\nAlgorithm converged at p = %d." % (p - 1)) + else: + print( + "\nAlgorithm not yet converged, but max_conds_dim = %d" + " reached." % max_conds_dim) + + return {'graph': graph, + 'sepset': sepset, + 'p_matrix': pvalues, + 'val_matrix': val_matrix, + } + + def _get_adj_time_series(self, graph, include_conflicts=True, sort_by=None): + """Helper function that returns dictionary of adjacencies from graph. + + Parameters + ---------- + graph : array of shape [N, N, tau_max+1] + Resulting causal graph, see description above for interpretation. + include_conflicts : bool, optional (default: True) + Whether conflicting links (marked as 2 in graph) should be returned. + sort_by : dict or none, optional (default: None) + If not None, the adjacencies are sorted by the absolute values of + the corresponding entries. + + Returns + ------- + adjt : dictionary + Adjacency dictionary. + """ + N, N, tau_max_plusone = graph.shape + adjt = {} + if include_conflicts: + for j in range(N): + where = np.where(graph[:, j, :] != 0) + adjt[j] = list(zip(*(where[0], -where[1]))) + else: + for j in range(N): + where = np.where(graph[:, j, :] == 1) + adjt[j] = list(zip(*(where[0], -where[1]))) + + if sort_by is not None: + for j in range(N): + # Get the absolute value for all the test statistics + abs_values = {k: np.abs(sort_by[j][k]) for k in list(sort_by[j]) + if k in adjt[j]} + adjt[j] = sorted(abs_values, key=abs_values.get, reverse=True) + + return adjt + + def _get_adj_time_series_contemp(self, graph, include_conflicts=True, + sort_by=None): + """Helper function that returns dictionary of contemporaneous + adjacencies from graph. + + Parameters + ---------- + graph : array of shape [N, N, tau_max+1] + Resulting causal graph, see description above for interpretation. + include_conflicts : bool, optional (default: True) + Whether conflicting links (marked as 2 in graph) should be returned. + sort_by : dict or none, optional (default: None) + If not None, the adjacencies are sorted by the absolute values of + the corresponding entries. + + Returns + ------- + adjt : dictionary + Contemporaneous adjacency dictionary. + """ + N, N, tau_max_plusone = graph.shape + adjt = self._get_adj_time_series(graph, + include_conflicts=include_conflicts, + sort_by=sort_by) + for j in range(N): + adjt[j] = [a for a in adjt[j] if a[1] == 0] + # adjt[j] = list(np.where(graph[:,j,0] != 0)[0]) + + return adjt + + def _get_sepset(self, tau_min, tau_max): + """Returns initial sepset. + + Parameters + ---------- + tau_min : int, optional (default: 0) + Minimum time lag to test. + tau_max : int, optional (default: 1) + Maximum time lag. Must be larger or equal to tau_min. + + Returns + ------- + sepset : dict + Initialized sepset. + """ + sepset = dict([(((i, -tau), j), []) + for tau in range(tau_min, tau_max + 1) + for i in range(self.N) + for j in range(self.N)]) + + return sepset + + def _find_unshielded_triples(self, graph): + """Find unshielded triples i_tau o-(>) k_t o-o j_t with i_tau -/- j_t. + + Excludes conflicting links. + + Parameters + ---------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + + Returns + ------- + triples : list + List of triples. + """ + + N = graph.shape[0] + adjt = self._get_adj_time_series(graph, include_conflicts=False) + + # Find unshielded triples + # Find triples i_tau o-(>) k_t o-o j_t with i_tau -/- j_t + triples = [] + for j in range(N): + for (k, tauk) in adjt[j]: + if tauk == 0: + for (i, taui) in adjt[k]: + if not (k == j or ( + taui == 0 and (i == k or i == j))): + if ((taui == 0 and graph[i, j, 0] == 0 and + graph[j, i, 0] == 0 and graph[j, k, 0] == 1) + or (taui < 0 and graph[j, k, 0] == 1 + and graph[i, j, abs(taui)] == 0)): + triples.append(((i, taui), k, j)) + + return triples + + def _pcalg_colliders(self, + graph, + sepset, + lagged_parents, + mode, + pc_alpha, + tau_max, + max_conds_py, + max_conds_px, + max_conds_px_lagged, + contemp_collider_rule, + conflict_resolution, + ): + """Implements the collider orientation step of the PC algorithm for + time series. + + Parameters + ---------- + graph : array of shape (N, N, tau_max+1) + Current graph. + sepset : dictionary + Separating sets. See paper for details. + lagged_parents : dictionary + Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing + additional conditions for each CI test. As part of PCMCIplus + these are the superset of lagged parents estimated with the PC1 + algorithm. + mode : {'standard', 'contemp_conds'} + For ``mode='contemp_conds'`` this implements Steps 2-4 of the + PCMCIplus method. For ``mode='standard'`` this implements the + standard PC algorithm adapted to time series. + pc_alpha : float, optional (default: 0.01) + Significance level. + tau_max : int, optional (default: 1) + Maximum time lag. Must be larger or equal to tau_min. + max_conds_py : int, optional (default: None) + Maximum number of lagged conditions of Y to use in MCI tests. If + None is passed, this number is unrestricted. + max_conds_px : int, optional (default: None) + Maximum number of lagged conditions of X to use in MCI tests. If + None is passed, this number is unrestricted. + max_conds_px_lagged : int, optional (default: None) + Maximum number of lagged conditions of X when X is lagged in MCI + tests. If None is passed, this number is equal to max_conds_px. + contemp_collider_rule : {'majority', 'conservative', 'none'} + Rule for collider phase to use. See the paper for details. Only + 'majority' and 'conservative' lead to an order-independent + algorithm. + conflict_resolution : bool, optional (default: True) + Whether to mark conflicts in orientation rules. Only for True + this leads to an order-independent algorithm. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Resulting causal graph, see description above for interpretation. + sepset : dictionary + Separating sets. See paper for details. + ambiguous_triples : list + List of ambiguous triples, only relevant for 'majority' and + 'conservative' rules, see paper for details. + """ + + if self.verbosity > 1: + print("\n----------------------------") + print("Collider orientation phase") + print("----------------------------") + print("\ncontemp_collider_rule = %s" % contemp_collider_rule) + print("conflict_resolution = %s\n" % conflict_resolution) + + # Find unshielded triples + triples = self._find_unshielded_triples(graph) + + v_structures = [] + ambiguous_triples = [] + + if contemp_collider_rule is None or contemp_collider_rule == 'none': + # Standard collider orientation rule of PC algorithm + # If k_t not in sepset(i_tau, j_t), then orient + # as i_tau --> k_t <-- j_t + for itaukj in triples: + (i, tau), k, j = itaukj + if (k, 0) not in sepset[((i, tau), j)]: + v_structures.append(itaukj) + else: + # Apply 'majority' or 'conservative' rule to orient colliders + # Compute all (contemp) subsets of potential parents of i and all + # subsets of potential parents of j that make i and j independent + def subsets(s): + if len(s) == 0: return [] + subsets = [] + for cardinality in range(len(s) + 1): + subsets += list(itertools.combinations(s, cardinality)) + subsets = [list(sub) for sub in list(set(subsets))] + return subsets + + # We only consider contemporaneous adjacencies because only these + # can include the (contemp) k. Furthermore, we only need to check + # adjacencies of i for tau=0 + if mode == 'contemp_conds': + adjt = self._get_adj_time_series_contemp(graph) + elif mode == 'standard': + adjt = self._get_adj_time_series(graph) + + n_triples = len(triples) + for ir, itaukj in enumerate(triples): + (i, tau), k, j = itaukj + + if self.verbosity > 1: + self._print_triple_info(itaukj, ir, n_triples) + + neighbor_subsets_tmp = subsets( + [(l, taul) for (l, taul) in adjt[j] + if not (l == i and tau == taul)]) + if tau == 0: + # Furthermore, we only need to check contemp. adjacencies + # of i for tau=0 + neighbor_subsets_tmp += subsets( + [(l, taul) for (l, taul) in adjt[i] + if not (l == j and taul == 0)]) + + # Make unique + neighbor_subsets = [] + for subset in neighbor_subsets_tmp: + if subset not in neighbor_subsets: + neighbor_subsets.append(subset) + + n_neighbors = len(neighbor_subsets) + + if self.verbosity > 1: + print( + " Iterate through %d condition subset(s) of " + "neighbors: " % n_neighbors) + if lagged_parents is not None: + self._print_pcmciplus_conditions(lagged_parents, i, j, + abs(tau), max_conds_py, max_conds_px, + max_conds_px_lagged) + + # Test which neighbor subsets separate i and j + neighbor_sepsets = [] + for iss, S in enumerate(neighbor_subsets): + val, pval, Z = self._run_pcalg_test( + i, abs(tau), j, S, lagged_parents, max_conds_py, + max_conds_px, max_conds_px_lagged, tau_max) + + if self.verbosity > 1: + self._print_cond_info(Z=S, comb_index=iss, pval=pval, + val=val) + + if pval > pc_alpha: + neighbor_sepsets += [S] + + if len(neighbor_sepsets) > 0: + fraction = np.sum( + [(k, 0) in S for S in neighbor_sepsets]) / float( + len(neighbor_sepsets)) + + if contemp_collider_rule == 'conservative': + # Triple is labeled as unambiguous if at least one + # separating set is found and either k is in ALL + # (fraction == 1) or NONE (fraction == 0) of them + if len(neighbor_sepsets) == 0: + if self.verbosity > 1: + print( + " No separating subsets --> ambiguous " + "triple found") + ambiguous_triples.append(itaukj) + else: + if fraction == 0: + # If (k, 0) is in none of the neighbor_sepsets, + # orient as collider + v_structures.append(itaukj) + if self.verbosity > 1: + print( + " Fraction of separating subsets " + "containing (%s 0) is = 0 --> collider " + "found" % self.var_names[k]) + # Also delete (k, 0) from sepset (if present) + if (k, 0) in sepset[((i, tau), j)]: + sepset[((i, tau), j)].remove((k, 0)) + if tau == 0: + if (k, 0) in sepset[((j, tau), i)]: + sepset[((j, tau), i)].remove((k, 0)) + elif fraction == 1: + # If (k, 0) is in all of the neighbor_sepsets, + # leave unoriented + if self.verbosity > 1: + print( + " Fraction of separating subsets " + "containing (%s 0) is = 1 --> " + "non-collider found" % self.var_names[k]) + # Also add (k, 0) to sepset (if not present) + if (k, 0) not in sepset[((i, tau), j)]: + sepset[((i, tau), j)].append((k, 0)) + if tau == 0: + if (k, 0) not in sepset[((j, tau), i)]: + sepset[((j, tau), i)].append((k, 0)) + else: + if self.verbosity > 1: + print( + " Fraction of separating subsets " + "containing (%s 0) is = between 0 and 1 " + "--> ambiguous triple found" % + self.var_names[k]) + ambiguous_triples.append(itaukj) + + elif contemp_collider_rule == 'majority': + + if len(neighbor_sepsets) == 0: + if self.verbosity > 1: + print( + " No separating subsets --> ambiguous " + "triple found") + ambiguous_triples.append(itaukj) + else: + if fraction == 0.5: + if self.verbosity > 1: + print( + " Fraction of separating subsets " + "containing (%s 0) is = 0.5 --> ambiguous " + "triple found" % self.var_names[k]) + ambiguous_triples.append(itaukj) + elif fraction < 0.5: + v_structures.append(itaukj) + if self.verbosity > 1: + print( + " Fraction of separating subsets " + "containing (%s 0) is < 0.5 " + "--> collider found" % self.var_names[k]) + # Also delete (k, 0) from sepset (if present) + if (k, 0) in sepset[((i, tau), j)]: + sepset[((i, tau), j)].remove((k, 0)) + if tau == 0: + if (k, 0) in sepset[((j, tau), i)]: + sepset[((j, tau), i)].remove((k, 0)) + elif fraction > 0.5: + if self.verbosity > 1: + print( + " Fraction of separating subsets " + "containing (%s 0) is > 0.5 " + "--> non-collider found" % + self.var_names[k]) + # Also add (k, 0) to sepset (if not present) + if (k, 0) not in sepset[((i, tau), j)]: + sepset[((i, tau), j)].append((k, 0)) + if tau == 0: + if (k, 0) not in sepset[((j, tau), i)]: + sepset[((j, tau), i)].append((k, 0)) + + if self.verbosity > 1 and len(v_structures) > 0: + print("\nOrienting links among colliders:") + + link_marker = {True:"o-o", False:"-->"} + + # Now go through list of v-structures and (optionally) detect conflicts + oriented_links = [] + for itaukj in v_structures: + (i, tau), k, j = itaukj + + if self.verbosity > 1: + print("\n Collider (%s % d) %s %s o-o %s:" % ( + self.var_names[i], tau, link_marker[ + tau==0], self.var_names[k], + self.var_names[j])) + + if (k, j) not in oriented_links and (j, k) not in oriented_links: + if self.verbosity > 1: + print(" Orient %s o-o %s as %s --> %s " % ( + self.var_names[j], self.var_names[k], self.var_names[j], + self.var_names[k])) + graph[k, j, 0] = 0 + oriented_links.append((j, k)) + else: + if conflict_resolution is False and self.verbosity > 1: + print(" Already oriented") + + if conflict_resolution: + if (k, j) in oriented_links: + if self.verbosity > 1: + print( + " Conflict since %s <-- %s already " + "oriented: Mark link as `2` in graph" % ( + self.var_names[j], self.var_names[k])) + graph[j, k, 0] = graph[k, j, 0] = 2 + + if tau == 0: + if (i, k) not in oriented_links and ( + k, i) not in oriented_links: + if self.verbosity > 1: + print(" Orient %s o-o %s as %s --> %s " % ( + self.var_names[i], self.var_names[k], + self.var_names[i], self.var_names[k])) + graph[k, i, 0] = 0 + oriented_links.append((i, k)) + else: + if conflict_resolution is False and self.verbosity > 1: + print(" Already oriented") + + if conflict_resolution: + if (k, i) in oriented_links: + if self.verbosity > 1: + print( + " Conflict since %s <-- %s already " + "oriented: Mark link as `2` in graph" % ( + self.var_names[i], self.var_names[k])) + graph[i, k, 0] = graph[k, i, 0] = 2 + + if self.verbosity > 1: + adjt = self._get_adj_time_series(graph) + print("\nUpdated adjacencies:") + self._print_parents(all_parents=adjt, val_min=None, pval_max=None) + + return {'graph': graph, + 'sepset': sepset, + 'ambiguous_triples': ambiguous_triples, + } + + def _find_triples_rule1(self, graph): + """Find triples i_tau --> k_t o-o j_t with i_tau -/- j_t. + + Excludes conflicting links. + + Parameters + ---------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + + Returns + ------- + triples : list + List of triples. + """ + adjt = self._get_adj_time_series(graph, include_conflicts=False) + + N = graph.shape[0] + triples = [] + for j in range(N): + for (k, tauk) in adjt[j]: + if tauk == 0 and graph[j, k, 0] == 1: + for (i, taui) in adjt[k]: + if not (k == j or (taui == 0 and (i == k or i == j))): + if ((taui == 0 and graph[i, j, 0] == 0 + and graph[j, i, 0] == 0 + and graph[k, i, 0] == 0) + or taui < 0 and graph[ + i, j, abs(taui)] == 0): + triples.append(((i, taui), k, j)) + return triples + + def _find_triples_rule2(self, graph): + """Find triples i_t --> k_t --> j_t with i_t -- j_t. + + Excludes conflicting links. + + Parameters + ---------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + + Returns + ------- + triples : list + List of triples. + """ + + adjtcont = self._get_adj_time_series_contemp(graph, + include_conflicts=False) + N = graph.shape[0] + + triples = [] + for j in range(N): + for (k, tauk) in adjtcont[j]: + if (j, 0) not in adjtcont[k]: + for (i, taui) in adjtcont[k]: + if (k, 0) not in adjtcont[i]: + if graph[i, j, 0] == 1 and graph[j, i, 0] == 1: + triples.append(((i, 0), k, j)) + return triples + + def _find_chains_rule3(self, graph): + """Find chains i_t o-o k_t --> j_t and i_t o-o l_t --> j_t with + i_t o-o j_t and k_t -/- l_t. + + Excludes conflicting links. + + Parameters + ---------- + graph : array of shape [N, N, tau_max+1] + Causal graph, see description above for interpretation. + + Returns + ------- + chains : list + List of chains. + """ + N = graph.shape[0] + adjtcont = self._get_adj_time_series_contemp(graph, + include_conflicts=False) + + chains = [] + for j in range(N): + for (i, _) in adjtcont[j]: + if graph[j, i, 0] == 1: + for (k, _) in adjtcont[j]: + for (l, _) in adjtcont[j]: + # Nodes should not be identical + if not ((k == l) or (k == i) or (l == i)): + # There should be an arrowhead from k and l to j + if (j, 0) not in adjtcont[k] and (j, 0) not \ + in adjtcont[l]: + # Check that i is adjacent to k and l + if (k, 0) in adjtcont[i]\ + and (l, 0) in adjtcont[i]: + # Check that not both have arrow + # towards i + if (i, 0) in adjtcont[k] \ + or (i, 0) in adjtcont[l]: + # k and l should not be adjacent + if (graph[k, l, 0] == 0 + and graph[l, k, 0] == 0): + chains.append((((i, 0), k, j), + ((i, 0), l, j))) + + return chains + + def _pcalg_rules_timeseries(self, + graph, + ambiguous_triples, + conflict_resolution, + ): + """Implements the rule orientation step of the PC algorithm for + time series. + + Parameters + ---------- + graph : array of shape (N, N, tau_max+1) + Current graph. + ambiguous_triples : list + List of ambiguous triples, only relevant for 'majority' and + 'conservative' rules, see paper for details. + conflict_resolution : bool + Whether to mark conflicts in orientation rules. Only for True + this leads to an order-independent algorithm. + + Returns + ------- + graph : array of shape [N, N, tau_max+1] + Resulting causal graph, see description above for interpretation. + """ + N = graph.shape[0] + + def rule1(graph, oriented_links): + """Find (unambiguous) triples i_tau --> k_t o-o j_t with + i_tau -/- j_t and orient as i_tau --> k_t --> j_t. + """ + triples = self._find_triples_rule1(graph) + triples_left = False + + for itaukj in triples: + if itaukj not in ambiguous_triples: + triples_left = True + # Orient as i_tau --> k_t --> j_t + (i, tau), k, j = itaukj + if (j, k) not in oriented_links and ( + k, j) not in oriented_links: + if self.verbosity > 1: + print( + " R1: Found (%s % d) --> %s o-o %s, " + "orient as %s --> %s" % ( + self.var_names[i], tau, self.var_names[k], + self.var_names[j], + self.var_names[k], self.var_names[j])) + graph[j, k, 0] = 0 + oriented_links.append((k, j)) + + if conflict_resolution: + if (j, k) in oriented_links: + if self.verbosity > 1: + print( + " Conflict since %s <-- %s already" + " oriented: Mark link as `2` in graph" % ( + self.var_names[k], self.var_names[j])) + graph[j, k, 0] = graph[k, j, 0] = 2 + + return triples_left, graph, oriented_links + + def rule2(graph, oriented_links): + """Find (unambiguous) triples i_t --> k_t --> j_t with i_t o-o j_t + and orient as i_t --> j_t. + """ + + triples = self._find_triples_rule2(graph) + triples_left = False + + for itaukj in triples: + if itaukj not in ambiguous_triples: + # TODO: CHeck whether this is actually needed + # since ambiguous triples are always unshielded and here + # we look for triples where i and j are connected + triples_left = True + # Orient as i_t --> j_t + (i, tau), k, j = itaukj + if (j, i) not in oriented_links and ( + i, j) not in oriented_links: + if self.verbosity > 1: + print( + " R2: Found %s --> %s --> %s with %s " + "o-o %s, orient as %s --> %s" % ( + self.var_names[i], self.var_names[k], + self.var_names[j], + self.var_names[i], self.var_names[j], + self.var_names[i], self.var_names[j])) + graph[j, i, 0] = 0 + oriented_links.append((i, j)) + if conflict_resolution: + if (j, i) in oriented_links: + if self.verbosity > 1: + print( + " Conflict since %s <-- %s already " + "oriented: Mark link as `2` in graph" % ( + self.var_names[i], self.var_names[j])) + graph[j, i, 0] = graph[i, j, 0] = 2 + + return triples_left, graph, oriented_links + + def rule3(graph, oriented_links): + """Find (unambiguous) chains i_t o-o k_t --> j_t + and i_t o-o l_t --> j_t with i_t o-o j_t + and k_t -/- l_t: Orient as i_t --> j_t. + """ + # First find all chains i_t -- k_t --> j_t with i_t -- j_t + # and k_t -/- l_t + chains = self._find_chains_rule3(graph) + + chains_left = False + + for (itaukj, itaulj) in chains: + if (itaukj not in ambiguous_triples and + itaulj not in ambiguous_triples): + # TODO: CHeck whether this is actually needed + # since ambiguous triples are always unshielded and here + # we look for triples where i and j are connected + chains_left = True + # Orient as i_t --> j_t + (i, tau), k, j = itaukj + _ , l, _ = itaulj + + if (j, i) not in oriented_links and ( + i, j) not in oriented_links: + if self.verbosity > 1: + print( + " R3: Found %s o-o %s --> %s and %s o-o " + "%s --> %s with %s o-o %s and %s -/- %s, " + "orient as %s --> %s" % ( + self.var_names[i], self.var_names[k], + self.var_names[j], self.var_names[i], + self.var_names[l], self.var_names[j], + self.var_names[i], self.var_names[j], + self.var_names[k], self.var_names[l], + self.var_names[i], self.var_names[j])) + graph[j, i, 0] = 0 + oriented_links.append((i, j)) + if conflict_resolution: + if (j, i) in oriented_links: + if self.verbosity > 1: + print( + " Conflict since %s <-- %s already " + "oriented: Mark link as `2` in graph" % ( + self.var_names[i], self.var_names[j])) + graph[j, i, 0] = graph[i, j, 0] = 2 + + return chains_left, graph, oriented_links + + if self.verbosity > 1: + print("\n") + print("----------------------------") + print("Rule orientation phase") + print("----------------------------") + + oriented_links = [] + graph_new = np.copy(graph) + any1 = any2 = any3 = True + while (any1 or any2 or any3): + if self.verbosity > 1: + print("\nTry rule(s) %s" % ( + np.where(np.array([0, any1, any2, any3])))) + any1, graph_new, oriented_links = rule1(graph_new, oriented_links) + any2, graph_new, oriented_links = rule2(graph_new, oriented_links) + any3, graph_new, oriented_links = rule3(graph_new, oriented_links) + + if self.verbosity > 1: + adjt = self._get_adj_time_series(graph_new) + print("\nUpdated adjacencies:") + self._print_parents(all_parents=adjt, val_min=None, pval_max=None) + + return graph_new + + def _get_simplicial_node(self, circle_cpdag, variable_order): + """Find simplicial nodes in circle component CPDAG. + + A vertex V is simplicial if all vertices adjacent to V are also adjacent + to each other (form a clique). + + Parameters + ---------- + circle_cpdag : array of shape (N, N, tau_max+1) + Circle component of PCMCIplus graph. + variable_order : list of length N + Order of variables in which to search for simplicial nodes. + + Returns + ------- + (j, adj_j) or None + First found simplicial node and its adjacencies. + """ + + for j in variable_order: + adj_j = np.where(circle_cpdag[:,j,0] == "o-o")[0].tolist() + + # Make sure the node has any adjacencies + all_adjacent = len(adj_j) > 0 + + # If it has just one adjacency, it's also simplicial + if len(adj_j) == 1: + return (j, adj_j) + else: + for (var1, var2) in itertools.combinations(adj_j, 2): + if circle_cpdag[var1, var2, 0] == "": + all_adjacent = False + break + + if all_adjacent: + return (j, adj_j) + + return None + + def _get_dag_from_cpdag(self, cpdag_graph, variable_order): + """Yields one member of the Markov equivalence class of a CPDAG. + + Removes conflicting edges. + + Used in PCMCI to run model selection on the output of PCMCIplus in order + to, e.g., optimize pc_alpha. + + Based on Zhang 2008, Theorem 2 (simplified for CPDAGs): Let H be the + graph resulting from the following procedure applied to a CPDAG: + + Consider the circle component of the CPDAG (sub graph consisting of all + (o-o edges, i.e., only for contemporaneous links), CPDAG^C and turn into + a DAG with no unshielded colliders. Then (H is a member of the Markov + equivalence class of the CPDAG. + + We use the approach mentioned in Colombo and Maathuis (2015) Lemma 7.6: + First note that CPDAG^C is chordal, that is, any cycle of length four or + more has a chord, which is an edge joining two vertices that are not + adjacent in the cycle; see the proof of Lemma 4.1 of Zhang (2008b). Any + chordal graph with more than one vertex has two simplicial vertices, + that is, vertices V such that all vertices adjacent to V are also + adjacent to each other. We choose such a vertex V1 and orient any edges + incident to V1 into V1. Since V1 is simplicial, this does not create + unshielded colliders. We then remove V1 and these edges from the graph. + The resulting graph is again chordal and therefore again has at least + two simplicial vertices. Choose such a vertex V2 , and orient any edges + incident to V2 into V2. We continue this procedure until all edges are + oriented. The resulting ordering is called a perfect elimination scheme + for CPDAG^C. Then the combined graph with the directed edges already + contained in the CPDAG is returned. + + Parameters + ---------- + cpdag_graph : array of shape (N, N, tau_max+1) + Result of PCMCIplus, a CPDAG. + variable_order : list of length N + Order of variables in which to search for simplicial nodes. + + Returns + ------- + dag : array of shape (N, N, tau_max+1) + One member of the Markov equivalence class of the CPDAG. + """ + + # TODO: Check whether CPDAG is chordal + + # Initialize resulting MAG + dag = np.copy(cpdag_graph) + + # Turn circle component CPDAG^C into a DAG with no unshielded colliders. + circle_cpdag = np.copy(cpdag_graph) + # All lagged links are directed by time, remove them here + circle_cpdag[:,:,1:] = "" + # Also remove conflicting links + circle_cpdag[circle_cpdag=="x-x"] = "" + # Find undirected links, remove directed links + for i, j, tau in zip(*np.where(circle_cpdag != "")): + if circle_cpdag[i,j,0] == "-->": + circle_cpdag[i,j,0] = "" + + # Iterate through simplicial nodes + simplicial_node = self._get_simplicial_node(circle_cpdag, + variable_order) + while simplicial_node is not None: + + # Choose such a vertex V1 and orient any edges incident to V1 into + # V1 in the MAG And remove V1 and these edges from the circle + # component PAG + (j, adj_j) = simplicial_node + for var in adj_j: + dag[var, j, 0] = "-->" + dag[j, var, 0] = "<--" + circle_cpdag[var, j, 0] = circle_cpdag[j, var, 0] = "" + + # Iterate + simplicial_node = self._get_simplicial_node(circle_cpdag, + variable_order) + + return dag + + def _optimize_pcmciplus_alpha(self, + selected_links, + tau_min, + tau_max, + pc_alpha, + contemp_collider_rule, + conflict_resolution, + reset_lagged_links, + max_conds_dim, + max_conds_py, + max_conds_px, + max_conds_px_lagged, + fdr_method, + ): + """Optimizes pc_alpha in PCMCIplus. + + If a list or None is passed for ``pc_alpha``, the significance level is + optimized for every graph across the given ``pc_alpha`` values using the + score computed in ``cond_ind_test.get_model_selection_criterion()`` + + Parameters + ---------- + See those for run_pcmciplus() + + Returns + ------- + Results for run_pcmciplus() for the optimal pc_alpha. + """ + + if pc_alpha is None: + pc_alpha_list = [0.001, 0.005, 0.01, 0.025, 0.05] + else: + pc_alpha_list = pc_alpha + + if self.verbosity > 0: + print("\n##\n## Optimizing pc_alpha over " + + "pc_alpha_list = %s" % str(pc_alpha_list) + + "\n##") + + results = {} + score = np.zeros_like(pc_alpha_list) + for iscore, pc_alpha_here in enumerate(pc_alpha_list): + # Print statement about the pc_alpha being tested + if self.verbosity > 0: + print("\n## pc_alpha = %s (%d/%d):" % (pc_alpha_here, + iscore + 1, + score.shape[0])) + # Get the results for this alpha value + results[pc_alpha_here] = \ + self.run_pcmciplus(selected_links=selected_links, + tau_min=tau_min, + tau_max=tau_max, + pc_alpha=pc_alpha_here, + contemp_collider_rule=contemp_collider_rule, + conflict_resolution=conflict_resolution, + reset_lagged_links=reset_lagged_links, + max_conds_dim=max_conds_dim, + max_conds_py=max_conds_py, + max_conds_px=max_conds_px, + max_conds_px_lagged=max_conds_px_lagged, + fdr_method=fdr_method) + + # Get one member of the Markov equivalence class of the result + # of PCMCIplus, which is a CPDAG + + # First create order that is based on some feature of the variables + # to avoid order-dependence of DAG, i.e., it should not matter + # in which order the variables appear in dataframe + # Here we use the sum of absolute val_matrix values incident at j + val_matrix = results[pc_alpha_here]['val_matrix'] + variable_order = np.argsort( + np.abs(val_matrix).sum(axis=(0,2)))[::-1] + + dag = self._get_dag_from_cpdag( + cpdag_graph=results[pc_alpha_here]['graph'], + variable_order=variable_order) + + + # Compute the best average score when the model selection + # is applied to all N variables + for j in range(self.N): + parents = [] + for i, tau in zip(*np.where(dag[:,j,:] == "-->")): + parents.append((i, -tau)) + score[iscore] += \ + self.cond_ind_test.get_model_selection_criterion( + j, parents, tau_max) + score[iscore] /= float(self.N) + + # Record the optimal alpha value + optimal_alpha = pc_alpha_list[score.argmin()] + + if self.verbosity > 0: + print("\n##"+ + "\n\n## Scores for individual pc_alpha values:\n") + for iscore, pc_alpha in enumerate(pc_alpha_list): + print(" pc_alpha = %7s yields score = %.5f" % (pc_alpha, + score[iscore])) + print("\n##\n## Results for optimal " + + "pc_alpha = %s\n##" % optimal_alpha) + self.print_results(results[optimal_alpha], alpha_level=optimal_alpha) + + optimal_results = results[optimal_alpha] + optimal_results['optimal_alpha'] = optimal_alpha + return optimal_results + +
[docs] def convert_to_string_graph(self, graph_bool): + """Converts the 0,1-based graph returned by PCMCI to a string array + with links '-->'. + + Parameters + ---------- + graph_bool : array + 0,1-based graph array output by PCMCI. + + Returns + ------- + graph : array + graph as string array with links '-->'. + """ + + graph = np.zeros(graph_bool.shape, dtype='<U3') + graph[:] = "" + # Lagged links + graph[:,:,1:][graph_bool[:,:,1:]==1] = "-->" + # Unoriented contemporaneous links + graph[:,:,0][np.logical_and(graph_bool[:,:,0]==1, + graph_bool[:,:,0].T==1)] = "o-o" + # Conflicting contemporaneous links + graph[:,:,0][np.logical_and(graph_bool[:,:,0]==2, + graph_bool[:,:,0].T==2)] = "x-x" + # Directed contemporaneous links + for (i,j) in zip(*np.where( + np.logical_and(graph_bool[:,:,0]==1, graph_bool[:,:,0].T==0))): + graph[i,j,0] = "-->" + graph[j,i,0] = "<--" + + return graph
+ +
[docs] def symmetrize_p_and_val_matrix(self, p_matrix, val_matrix, selected_links, conf_matrix=None): + """Symmetrizes the p_matrix, val_matrix, and conf_matrix based on selected_links + and the larger p-value. + + Parameters + ---------- + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values. Set to 1 if val_only=True. + conf_matrix : array of shape [N, N, tau_max+1,2] + Estimated matrix of confidence intervals of test statistic values. + Only computed if set in cond_ind_test, where also the percentiles + are set. + selected_links : dict or None + Dictionary of form {0: [(3, -2), ...], 1:[], ...} + specifying whether only selected links should be tested. If None is + passed, all links are tested. + + Returns + ------- + val_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of test statistic values. + p_matrix : array of shape [N, N, tau_max+1] + Estimated matrix of p-values. Set to 1 if val_only=True. + conf_matrix : array of shape [N, N, tau_max+1,2] + Estimated matrix of confidence intervals of test statistic values. + Only computed if set in cond_ind_test, where also the percentiles + are set. + """ + + # Symmetrize p_matrix and val_matrix and conf_matrix + for i in range(self.N): + for j in range(self.N): + # If both the links are present in selected_links, symmetrize using maximum p-value + if ((i, 0) in selected_links[j] and (j, 0) in selected_links[i]): + if (p_matrix[i, j, 0] + >= p_matrix[j, i, 0]): + p_matrix[j, i, 0] = p_matrix[i, j, 0] + val_matrix[j, i, 0] = val_matrix[i, j, 0] + if conf_matrix is not None: + conf_matrix[j, i, 0] = conf_matrix[i, j, 0] + + # If only one of the links is present in selected_links, symmetrize using the p-value of the link present + elif ((i, 0) in selected_links[j] and (j, 0) not in selected_links[i]): + p_matrix[j, i, 0] = p_matrix[i, j, 0] + val_matrix[j, i, 0] = val_matrix[i, j, 0] + if conf_matrix is not None: + conf_matrix[j, i, 0] = conf_matrix[i, j, 0] + else: + # Links not present in selected_links + pass + + # Return the values as a dictionary and store in class + results = {'val_matrix': val_matrix, + 'p_matrix': p_matrix, + 'conf_matrix': conf_matrix} + return results
+ +
[docs] def run_sliding_window_of(self, method, method_args, + window_step, + window_length, + conf_lev = 0.95, + ): + """Runs chosen method on sliding windows taken from DataFrame. + + The function returns summary_results and all_results (containing the + individual window results). summary_results contains val_matrix_mean + and val_matrix_interval, the latter containing the confidence bounds for + conf_lev. If the method also returns a graph, then 'most_frequent_links' + containing the most frequent link outcome (either 0 or 1 or a specific + link type) in each entry of graph, as well as 'link_frequency', + containing the occurence frequency of the most frequent link outcome, + are returned. + + Parameters + ---------- + method : str + Chosen method among valid functions in PCMCI. + method_args : dict + Arguments passed to method. + window_step : int + Time step of windows. + window_length : int + Length of sliding window. + conf_lev : float, optional (default: 0.9) + Two-sided confidence interval for summary results. + + Returns + ------- + Dictionary of results for every sliding window. + """ + + valid_methods = ['run_pc_stable', + 'run_mci', + 'get_lagged_dependencies', + 'run_fullci', + 'run_bivci', + 'run_pcmci', + 'run_pcalg', + # 'run_pcalg_non_timeseries_data', + 'run_pcmciplus',] + + if method not in valid_methods: + raise ValueError("method must be one of %s" % str(valid_methods)) + + T = self.T + + if self.cond_ind_test.recycle_residuals: + # recycle_residuals clashes with sliding windows... + raise ValueError("cond_ind_test.recycle_residuals must be False.") + + if self.verbosity > 0: + print("\n##\n## Running sliding window analysis of %s " % method + + "\n##\n" + + "\nwindow_step = %s \n" % window_step + + "\nwindow_length = %s \n" % window_length + ) + + if self.dataframe.missing_flag is None: + self.dataframe.missing_flag = True + + original_data = deepcopy(self.dataframe.values) + + window_start_points = np.arange(0, T - window_length, window_step) + n_windows = len(window_start_points) + + window_results = {} + for iw, w in enumerate(window_start_points): + # Set values before and after window to np.nan + data_window = deepcopy(original_data) + data_window[0:w] = np.nan + data_window[w + window_length:] = np.nan + + self.dataframe.values = data_window + window_res = getattr(self, method)(**method_args) + + # Aggregate val_matrix and other arrays to new arrays with + # windows as first dimension. Lists and other objects + # are stored in dictionary + for key in window_res: + res_item = window_res[key] + if iw == 0: + if type(res_item) is np.ndarray: + window_results[key] = np.empty((n_windows,) + + res_item.shape, + dtype=res_item.dtype) + else: + window_results[key] = {} + + window_results[key][iw] = res_item + + # Generate summary results + summary_results = {} + + if 'graph' in window_results: + most_frequent_links, counts = scipy.stats.mode( + window_results['graph'], axis=0) + summary_results['most_frequent_links'] =\ + most_frequent_links[0] #.squeeze() + summary_results['link_frequency'] =\ + counts[0]/float(n_windows) + + # Confidence intervals for val_matrix; interval is two-sided + c_int = (1. - (1. - conf_lev)/2.) + summary_results['val_matrix_mean'] = np.mean( + window_results['val_matrix'], axis=0) + + summary_results['val_matrix_interval'] = np.stack(np.percentile( + window_results['val_matrix'], axis=0, + q = [100*(1. - c_int), 100*c_int]), axis=3) + + return {'summary_results': summary_results, + 'window_results': window_results}
+ + +if __name__ == '__main__': + from tigramite.independence_tests import ParCorr, CMIknn + import tigramite.data_processing as pp + from tigramite.toymodels import structural_causal_processes as toys + import tigramite.plotting as tp + + np.random.seed(43) + + # Example process to play around with + # Each key refers to a variable and the incoming links are supplied + # as a list of format [((var, -lag), coeff, function), ...] + def lin_f(x): return x + def nonlin_f(x): return (x + 5. * x ** 2 * np.exp(-x ** 2 / 20.)) + + links = {0: [((0, -1), 0.9, lin_f)], + 1: [((1, -1), 0.8, lin_f), ((0, -1), 0.5, lin_f)], + # 2: [((2, -1), 0.7, lin_f), ((1, 0), 0.1, lin_f)], + # 3: [((3, -1), 0.7, lin_f), ((2, 0), -0.1, lin_f)], + } + + data, nonstat = toys.structural_causal_process(links, + T=10000, seed=7) + + # Data must be array of shape (time, variables) + print(data.shape) + dataframe = pp.DataFrame(data) + cond_ind_test = ParCorr() + pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test, verbosity=1) + # results = pcmci.run_pcmciplus(tau_min=0, tau_max=2, pc_alpha=0.01) + # pcmci.print_results(results, alpha_level=0.01) + + + print(pcmci.run_sliding_window_of(method='run_pcmciplus', + method_args={'tau_min':0, 'tau_max':3, 'pc_alpha':0.01}, + window_step=500, + window_length=1000, + conf_lev = 0.95)['summary_results']) + +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_modules/tigramite/plotting.html b/docs/_build/html/_modules/tigramite/plotting.html new file mode 100644 index 00000000..ab7b6ab1 --- /dev/null +++ b/docs/_build/html/_modules/tigramite/plotting.html @@ -0,0 +1,3427 @@ + + + + + + + tigramite.plotting — Tigramite 5.0 documentation + + + + + + + + + + + + + +
+
+
+
+ +

Source code for tigramite.plotting

+"""Tigramite plotting package."""
+
+# Author: Jakob Runge <jakobrunge@posteo.de>
+#
+# License: GNU General Public License v3.0
+
+import numpy as np
+import matplotlib
+from matplotlib.colors import ListedColormap
+import matplotlib.transforms as transforms
+from matplotlib import pyplot, ticker
+from matplotlib.ticker import FormatStrFormatter
+import matplotlib.patches as mpatches
+from matplotlib.collections import PatchCollection
+
+import sys
+from operator import sub
+import networkx as nx
+import tigramite.data_processing as pp
+from copy import deepcopy
+import matplotlib.path as mpath
+import matplotlib.patheffects as PathEffects
+
+# TODO: Add proper docstrings to internal functions...
+
+
+def _par_corr_trafo(cmi):
+    """Transformation of CMI to partial correlation scale."""
+
+    # Set negative values to small positive number
+    # (zero would be interpreted as non-significant in some functions)
+    if np.ndim(cmi) == 0:
+        if cmi < 0.0:
+            cmi = 1e-8
+    else:
+        cmi[cmi < 0.0] = 1e-8
+
+    return np.sqrt(1.0 - np.exp(-2.0 * cmi))
+
+
+def _par_corr_to_cmi(par_corr):
+    """Transformation of partial correlation to CMI scale."""
+
+    return -0.5 * np.log(1.0 - par_corr ** 2)
+
+
+def _myround(x, base=5, round_mode="updown"):
+    """Rounds x to a float with precision base."""
+
+    if round_mode == "updown":
+        return base * round(float(x) / base)
+    elif round_mode == "down":
+        return base * np.floor(float(x) / base)
+    elif round_mode == "up":
+        return base * np.ceil(float(x) / base)
+
+    return base * round(float(x) / base)
+
+
+def _make_nice_axes(ax, where=None, skip=2, color=None):
+    """Makes nice axes."""
+
+    if where is None:
+        where = ["left", "bottom"]
+    if color is None:
+        color = {"left": "black", "right": "black", "bottom": "black", "top": "black"}
+
+    if type(skip) == int:
+        skip_x = skip_y = skip
+    else:
+        skip_x = skip[0]
+        skip_y = skip[1]
+
+    for loc, spine in ax.spines.items():
+        if loc in where:
+            spine.set_position(("outward", 5))  # outward by 10 points
+            spine.set_color(color[loc])
+            if loc == "left" or loc == "right":
+                pyplot.setp(ax.get_yticklines(), color=color[loc])
+                pyplot.setp(ax.get_yticklabels(), color=color[loc])
+            if loc == "top" or loc == "bottom":
+                pyplot.setp(ax.get_xticklines(), color=color[loc])
+        elif loc in [
+            item for item in ["left", "bottom", "right", "top"] if item not in where
+        ]:
+            spine.set_color("none")  # don't draw spine
+
+        else:
+            raise ValueError("unknown spine location: %s" % loc)
+
+    # ax.xaxis.get_major_formatter().set_useOffset(False)
+
+    # turn off ticks where there is no spine
+    if "top" in where and "bottom" not in where:
+        ax.xaxis.set_ticks_position("top")
+        ax.set_xticks(ax.get_xticks()[::skip_x])
+    elif "bottom" in where:
+        ax.xaxis.set_ticks_position("bottom")
+        ax.set_xticks(ax.get_xticks()[::skip_x])
+    else:
+        ax.xaxis.set_ticks_position("none")
+        ax.xaxis.set_ticklabels([])
+    if "right" in where and "left" not in where:
+        ax.yaxis.set_ticks_position("right")
+        ax.set_yticks(ax.get_yticks()[::skip_y])
+    elif "left" in where:
+        ax.yaxis.set_ticks_position("left")
+        ax.set_yticks(ax.get_yticks()[::skip_y])
+    else:
+        ax.yaxis.set_ticks_position("none")
+        ax.yaxis.set_ticklabels([])
+
+    ax.patch.set_alpha(0.0)
+
+
+def _get_absmax(val_matrix):
+    """Get value at absolute maximum in lag function array.
+    For an (N, N, tau)-array this comutes the lag of the absolute maximum
+    along the tau-axis and stores the (positive or negative) value in
+    the (N,N)-array absmax."""
+
+    absmax_indices = np.abs(val_matrix).argmax(axis=2)
+    i, j = np.indices(val_matrix.shape[:2])
+
+    return val_matrix[i, j, absmax_indices]
+
+
+def _add_timeseries(
+    fig,
+    axes,
+    i,
+    time,
+    dataseries,
+    label,
+    use_mask=False,
+    mask=None,
+    missing_flag=None,
+    grey_masked_samples=False,
+    data_linewidth=1.0,
+    skip_ticks_data_x=1,
+    skip_ticks_data_y=1,
+    unit=None,
+    last=False,
+    time_label="",
+    label_fontsize=10,
+    color="black",
+    grey_alpha=1.0,
+):
+    """Adds a time series plot to an axis.
+    Plot of dataseries is added to axis. Allows for proper visualization of
+    masked data.
+
+    Parameters
+    ----------
+    fig : figure instance
+        Figure instance.
+    axes : axis instance
+        Either gridded axis object or single axis instance.
+    i : int
+        Index of axis in gridded axis object.
+    time : array
+        Timelabel array.
+    dataseries : array-like
+        One-dimensional data series array of variable.
+    missing_flag : number, optional (default: None)
+        Flag for missing values in dataframe. Dismisses all time slices of
+        samples where missing values occur in any variable and also flags
+        samples for all lags up to 2*tau_max. This avoids biases, see section on
+        masking in Supplement of [1]_.
+    label : str
+        Variable label.
+    use_mask : bool, optional (default: False)
+        Whether to use masked data.
+    mask : array-like, optional (default: None)
+        Data mask where True labels masked samples.
+    grey_masked_samples : bool, optional (default: False)
+        Whether to mark masked samples by grey fills ('fill') or grey data
+        ('data').
+    data_linewidth : float, optional (default: 1.)
+        Linewidth.
+    skip_ticks_data_x : int, optional (default: 1)
+        Skip every other tickmark.
+    skip_ticks_data_y : int, optional (default: 1)
+        Skip every other tickmark.
+    unit : str, optional (default: None)
+        Units of variable.
+    last : bool, optional (default: False)
+        Specifiy whether this is the last panel where also the bottom axis is
+        plotted.
+    time_label : str, optional (default: '')
+        Label of time axis.
+    label_fontsize : int, optional (default: 10)
+        Fontsize.
+    color : str, optional (default: black)
+        Line color.
+    grey_alpha : float, optional (default: 1.)
+        Opacity of line.
+    """
+
+    # axes[i].xaxis.get_major_formatter().set_useOffset(False)
+    try:
+        ax = axes[i]
+    except:
+        ax = axes
+
+    if missing_flag is not None:
+        dataseries_nomissing = np.ma.masked_where(
+            dataseries == missing_flag, dataseries
+        )
+    else:
+        dataseries_nomissing = np.ma.masked_where(
+            np.zeros(dataseries.shape), dataseries
+        )
+
+    if use_mask:
+
+        maskdata = np.ma.masked_where(mask, dataseries_nomissing)
+
+        if grey_masked_samples == "fill":
+            ax.fill_between(
+                time,
+                maskdata.min(),
+                maskdata.max(),
+                where=mask,
+                color="grey",
+                interpolate=True,
+                linewidth=0.0,
+                alpha=grey_alpha,
+            )
+        elif grey_masked_samples == "data":
+            ax.plot(
+                time,
+                dataseries_nomissing,
+                color="grey",
+                marker=".",
+                markersize=data_linewidth,
+                linewidth=data_linewidth,
+                clip_on=False,
+                alpha=grey_alpha,
+            )
+
+        ax.plot(
+            time,
+            maskdata,
+            color=color,
+            linewidth=data_linewidth,
+            marker=".",
+            markersize=data_linewidth,
+            clip_on=False,
+        )
+    else:
+        ax.plot(
+            time,
+            dataseries_nomissing,
+            color=color,
+            linewidth=data_linewidth,
+            clip_on=False,
+        )
+
+    if last:
+        _make_nice_axes(
+            ax, where=["left", "bottom"], skip=(skip_ticks_data_x, skip_ticks_data_y)
+        )
+        ax.set_xlabel(r"%s" % time_label, fontsize=label_fontsize)
+    else:
+        _make_nice_axes(ax, where=["left"], skip=(skip_ticks_data_x, skip_ticks_data_y))
+    # ax.get_xaxis().get_major_formatter().set_useOffset(False)
+
+    ax.xaxis.set_major_formatter(FormatStrFormatter("%.0f"))
+    ax.label_outer()
+
+    ax.set_xlim(time[0], time[-1])
+
+    trans = transforms.blended_transform_factory(fig.transFigure, ax.transAxes)
+    if unit:
+        ax.set_ylabel(r"%s [%s]" % (label, unit), fontsize=label_fontsize)
+    else:
+        ax.set_ylabel(r"%s" % (label), fontsize=label_fontsize)
+
+        # ax.text(.02, .5, r'%s [%s]' % (label, unit), fontsize=label_fontsize,
+        #         horizontalalignment='left', verticalalignment='center',
+        #         rotation=90, transform=trans)
+    # else:
+    #     ax.text(.02, .5, r'%s' % (label), fontsize=label_fontsize,
+    #             horizontalalignment='left', verticalalignment='center',
+    #             rotation=90, transform=trans)
+    pyplot.tight_layout()
+
+
+
[docs]def plot_timeseries( + dataframe=None, + save_name=None, + fig_axes=None, + figsize=None, + var_units=None, + time_label="time", + use_mask=False, + grey_masked_samples=False, + data_linewidth=1.0, + skip_ticks_data_x=1, + skip_ticks_data_y=2, + label_fontsize=12, +): + """Create and save figure of stacked panels with time series. + + Parameters + ---------- + dataframe : data object, optional + This is the Tigramite dataframe object. It has the attributes + dataframe.values yielding a np array of shape (observations T, + variables N) and optionally a mask of the same shape. + save_name : str, optional (default: None) + Name of figure file to save figure. If None, figure is shown in window. + fig_axes : subplots instance, optional (default: None) + Figure and axes instance. If None they are created as + fig, axes = pyplot.subplots(N,...) + figsize : tuple of floats, optional (default: None) + Figure size if new figure is created. If None, default pyplot figsize + is used. + var_units : list of str, optional (default: None) + Units of variables. + time_label : str, optional (default: '') + Label of time axis. + use_mask : bool, optional (default: False) + Whether to use masked data. + grey_masked_samples : bool, optional (default: False) + Whether to mark masked samples by grey fills ('fill') or grey data + ('data'). + data_linewidth : float, optional (default: 1.) + Linewidth. + skip_ticks_data_x : int, optional (default: 1) + Skip every other tickmark. + skip_ticks_data_y : int, optional (default: 2) + Skip every other tickmark. + label_fontsize : int, optional (default: 10) + Fontsize of variable labels. + """ + + # Read in all attributes from dataframe + data = dataframe.values + mask = dataframe.mask + var_names = dataframe.var_names + missing_flag = dataframe.missing_flag + datatime = dataframe.datatime + + T, N = data.shape + + if var_units is None: + var_units = ["" for i in range(N)] + + if fig_axes is None: + fig, axes = pyplot.subplots(N, sharex=True, figsize=figsize) + else: + fig, axes = fig_axes + + for i in range(N): + if mask is None: + mask_i = None + else: + mask_i = mask[:, i] + _add_timeseries( + fig=fig, + axes=axes, + i=i, + time=datatime, + dataseries=data[:, i], + label=var_names[i], + use_mask=use_mask, + mask=mask_i, + missing_flag=missing_flag, + grey_masked_samples=grey_masked_samples, + data_linewidth=data_linewidth, + skip_ticks_data_x=skip_ticks_data_x, + skip_ticks_data_y=skip_ticks_data_y, + unit=var_units[i], + last=(i == N - 1), + time_label=time_label, + label_fontsize=label_fontsize, + ) + + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.15, right=0.95, hspace=0.3) + pyplot.tight_layout() + + if save_name is not None: + fig.savefig(save_name) + else: + return fig, axes
+ + +
[docs]def plot_lagfuncs(val_matrix, name=None, setup_args={}, add_lagfunc_args={}): + """Wrapper helper function to plot lag functions. + Sets up the matrix object and plots the lagfunction, see parameters in + setup_matrix and add_lagfuncs. + + Parameters + ---------- + val_matrix : array_like + Matrix of shape (N, N, tau_max+1) containing test statistic values. + name : str, optional (default: None) + File name. If None, figure is shown in window. + setup_args : dict + Arguments for setting up the lag function matrix, see doc of + setup_matrix. + add_lagfunc_args : dict + Arguments for adding a lag function matrix, see doc of add_lagfuncs. + + Returns + ------- + matrix : object + Further lag functions can be overlaid using the + matrix.add_lagfuncs(val_matrix) function. + """ + + N, N, tau_max_plusone = val_matrix.shape + tau_max = tau_max_plusone - 1 + + matrix = setup_matrix(N=N, tau_max=tau_max, **setup_args) + matrix.add_lagfuncs(val_matrix=val_matrix, **add_lagfunc_args) + + if name is not None: + matrix.savefig(name=name) + + return matrix
+ + +
[docs]class setup_matrix: + """Create matrix of lag function panels. + Class to setup figure object. The function add_lagfuncs(...) allows to plot + the val_matrix of shape (N, N, tau_max+1). Multiple lagfunctions can be + overlaid for comparison. + + Parameters + ---------- + N : int + Number of variables + tau_max : int + Maximum time lag. + var_names : list, optional (default: None) + List of variable names. If None, range(N) is used. + figsize : tuple of floats, optional (default: None) + Figure size if new figure is created. If None, default pyplot figsize + is used. + minimum : int, optional (default: -1) + Lower y-axis limit. + maximum : int, optional (default: 1) + Upper y-axis limit. + label_space_left : float, optional (default: 0.1) + Fraction of horizontal figure space to allocate left of plot for labels. + label_space_top : float, optional (default: 0.05) + Fraction of vertical figure space to allocate top of plot for labels. + legend_width : float, optional (default: 0.15) + Fraction of horizontal figure space to allocate right of plot for + legend. + x_base : float, optional (default: 1.) + x-tick intervals to show. + y_base : float, optional (default: .4) + y-tick intervals to show. + plot_gridlines : bool, optional (default: False) + Whether to show a grid. + lag_units : str, optional (default: '') + lag_array : array, optional (default: None) + Optional specification of lags overwriting np.arange(0, tau_max+1) + label_fontsize : int, optional (default: 10) + Fontsize of variable labels. + """ + + def __init__( + self, + N, + tau_max, + var_names=None, + figsize=None, + minimum=-1, + maximum=1, + label_space_left=0.1, + label_space_top=0.05, + legend_width=0.15, + legend_fontsize=10, + x_base=1.0, + y_base=0.5, + plot_gridlines=False, + lag_units="", + lag_array=None, + label_fontsize=10, + ): + + self.tau_max = tau_max + + self.labels = [] + self.lag_units = lag_units + # if lag_array is None: + # self.lag_array = np.arange(0, self.tau_max + 1) + # else: + self.lag_array = lag_array + if x_base is None: + self.x_base = 1 + else: + self.x_base = x_base + + self.legend_width = legend_width + self.legend_fontsize = legend_fontsize + + self.label_space_left = label_space_left + self.label_space_top = label_space_top + self.label_fontsize = label_fontsize + + self.fig = pyplot.figure(figsize=figsize) + + self.axes_dict = {} + + if var_names is None: + var_names = range(N) + + plot_index = 1 + for i in range(N): + for j in range(N): + self.axes_dict[(i, j)] = self.fig.add_subplot(N, N, plot_index) + # Plot process labels + if j == 0: + trans = transforms.blended_transform_factory( + self.fig.transFigure, self.axes_dict[(i, j)].transAxes + ) + self.axes_dict[(i, j)].text( + 0.01, + 0.5, + "%s" % str(var_names[i]), + fontsize=label_fontsize, + horizontalalignment="left", + verticalalignment="center", + transform=trans, + ) + if i == 0: + trans = transforms.blended_transform_factory( + self.axes_dict[(i, j)].transAxes, self.fig.transFigure + ) + self.axes_dict[(i, j)].text( + 0.5, + 0.99, + r"${\to}$ " + "%s" % str(var_names[j]), + fontsize=label_fontsize, + horizontalalignment="center", + verticalalignment="top", + transform=trans, + ) + + # Make nice axis + _make_nice_axes( + self.axes_dict[(i, j)], where=["left", "bottom"], skip=(1, 1) + ) + if x_base is not None: + self.axes_dict[(i, j)].xaxis.set_major_locator( + ticker.FixedLocator(np.arange(0, self.tau_max + 1, x_base)) + ) + if x_base / 2.0 % 1 == 0: + self.axes_dict[(i, j)].xaxis.set_minor_locator( + ticker.FixedLocator( + np.arange(0, self.tau_max + 1, x_base / 2.0) + ) + ) + if y_base is not None: + self.axes_dict[(i, j)].yaxis.set_major_locator( + ticker.FixedLocator( + np.arange( + _myround(minimum, y_base, "down"), + _myround(maximum, y_base, "up") + y_base, + y_base, + ) + ) + ) + self.axes_dict[(i, j)].yaxis.set_minor_locator( + ticker.FixedLocator( + np.arange( + _myround(minimum, y_base, "down"), + _myround(maximum, y_base, "up") + y_base, + y_base / 2.0, + ) + ) + ) + + self.axes_dict[(i, j)].set_ylim( + _myround(minimum, y_base, "down"), + _myround(maximum, y_base, "up"), + ) + if j != 0: + self.axes_dict[(i, j)].get_yaxis().set_ticklabels([]) + self.axes_dict[(i, j)].set_xlim(0, self.tau_max) + if plot_gridlines: + self.axes_dict[(i, j)].grid( + True, + which="major", + color="black", + linestyle="dotted", + dashes=(1, 1), + linewidth=0.05, + zorder=-5, + ) + + plot_index += 1 + +
[docs] def add_lagfuncs( + self, + val_matrix, + sig_thres=None, + conf_matrix=None, + color="black", + label=None, + two_sided_thres=True, + marker=".", + markersize=5, + alpha=1.0, + ): + """Add lag function plot from val_matrix array. + + Parameters + ---------- + val_matrix : array_like + Matrix of shape (N, N, tau_max+1) containing test statistic values. + sig_thres : array-like, optional (default: None) + Matrix of significance thresholds. Must be of same shape as + val_matrix. + conf_matrix : array-like, optional (default: None) + Matrix of shape (, N, tau_max+1, 2) containing confidence bounds. + color : str, optional (default: 'black') + Line color. + label : str + Test statistic label. + two_sided_thres : bool, optional (default: True) + Whether to draw sig_thres for pos. and neg. values. + marker : matplotlib marker symbol, optional (default: '.') + Marker. + markersize : int, optional (default: 5) + Marker size. + alpha : float, optional (default: 1.) + Opacity. + """ + + if label is not None: + self.labels.append((label, color, marker, markersize, alpha)) + + for ij in list(self.axes_dict): + i = ij[0] + j = ij[1] + maskedres = np.copy(val_matrix[i, j, int(i == j) :]) + self.axes_dict[(i, j)].plot( + range(int(i == j), self.tau_max + 1), + maskedres, + linestyle="", + color=color, + marker=marker, + markersize=markersize, + alpha=alpha, + clip_on=False, + ) + if conf_matrix is not None: + maskedconfres = np.copy(conf_matrix[i, j, int(i == j) :]) + self.axes_dict[(i, j)].plot( + range(int(i == j), self.tau_max + 1), + maskedconfres[:, 0], + linestyle="", + color=color, + marker="_", + markersize=markersize - 2, + alpha=alpha, + clip_on=False, + ) + self.axes_dict[(i, j)].plot( + range(int(i == j), self.tau_max + 1), + maskedconfres[:, 1], + linestyle="", + color=color, + marker="_", + markersize=markersize - 2, + alpha=alpha, + clip_on=False, + ) + + self.axes_dict[(i, j)].plot( + range(int(i == j), self.tau_max + 1), + np.zeros(self.tau_max + 1 - int(i == j)), + color="black", + linestyle="dotted", + linewidth=0.1, + ) + + if sig_thres is not None: + maskedsigres = sig_thres[i, j, int(i == j) :] + + self.axes_dict[(i, j)].plot( + range(int(i == j), self.tau_max + 1), + maskedsigres, + color=color, + linestyle="solid", + linewidth=0.1, + alpha=alpha, + ) + if two_sided_thres: + self.axes_dict[(i, j)].plot( + range(int(i == j), self.tau_max + 1), + -sig_thres[i, j, int(i == j) :], + color=color, + linestyle="solid", + linewidth=0.1, + alpha=alpha, + )
+ # pyplot.tight_layout() + +
[docs] def savefig(self, name=None): + """Save matrix figure. + + Parameters + ---------- + name : str, optional (default: None) + File name. If None, figure is shown in window. + """ + + # Trick to plot legend + if len(self.labels) > 0: + axlegend = self.fig.add_subplot(111, frameon=False) + axlegend.spines["left"].set_color("none") + axlegend.spines["right"].set_color("none") + axlegend.spines["bottom"].set_color("none") + axlegend.spines["top"].set_color("none") + axlegend.set_xticks([]) + axlegend.set_yticks([]) + + # self.labels.append((label, color, marker, markersize, alpha)) + for item in self.labels: + label = item[0] + color = item[1] + marker = item[2] + markersize = item[3] + alpha = item[4] + + axlegend.plot( + [], + [], + linestyle="", + color=color, + marker=marker, + markersize=markersize, + label=label, + alpha=alpha, + ) + axlegend.legend( + loc="upper left", + ncol=1, + bbox_to_anchor=(1.05, 0.0, 0.1, 1.0), + borderaxespad=0, + fontsize=self.legend_fontsize, + ).draw_frame(False) + + self.fig.subplots_adjust( + left=self.label_space_left, + right=1.0 - self.legend_width, + top=1.0 - self.label_space_top, + hspace=0.35, + wspace=0.35, + ) + pyplot.figtext( + 0.5, + 0.01, + r"lag $\tau$ [%s]" % self.lag_units, + horizontalalignment="center", + fontsize=self.label_fontsize, + ) + else: + self.fig.subplots_adjust( + left=self.label_space_left, + right=0.95, + top=1.0 - self.label_space_top, + hspace=0.35, + wspace=0.35, + ) + pyplot.figtext( + 0.55, + 0.01, + r"lag $\tau$ [%s]" % self.lag_units, + horizontalalignment="center", + fontsize=self.label_fontsize, + ) + + if self.lag_array is not None: + assert self.lag_array.shape == np.arange(self.tau_max + 1).shape + for ij in list(self.axes_dict): + i = ij[0] + j = ij[1] + self.axes_dict[(i, j)].set_xticklabels(self.lag_array[:: self.x_base]) + + if name is not None: + self.fig.savefig(name) + else: + pyplot.show()
+ + +def _draw_network_with_curved_edges( + fig, + ax, + G, + pos, + node_rings, + node_labels, + node_label_size, + node_alpha=1.0, + standard_size=100, + node_aspect=None, + standard_cmap="OrRd", + standard_color_links='black', + standard_color_nodes='lightgrey', + log_sizes=False, + cmap_links="YlOrRd", + # cmap_links_edges="YlOrRd", + links_vmin=0.0, + links_vmax=1.0, + links_edges_vmin=0.0, + links_edges_vmax=1.0, + links_ticks=0.2, + links_edges_ticks=0.2, + link_label_fontsize=8, + arrowstyle="->, head_width=0.4, head_length=1", + arrowhead_size=3.0, + curved_radius=0.2, + label_fontsize=4, + label_fraction=0.5, + link_colorbar_label="link", + # link_edge_colorbar_label='link_edge', + inner_edge_curved=False, + inner_edge_style="solid", + network_lower_bound=0.2, + show_colorbar=True, + special_nodes=None, +): + """Function to draw a network from networkx graph instance. + Various attributes are used to specify the graph's properties. + This function is just a beta-template for now that can be further + customized. + """ + + from matplotlib.patches import FancyArrowPatch, Circle, Ellipse + + ax.spines["left"].set_color("none") + ax.spines["right"].set_color("none") + ax.spines["bottom"].set_color("none") + ax.spines["top"].set_color("none") + ax.set_xticks([]) + ax.set_yticks([]) + + N = len(G) + + # This fixes a positioning bug in matplotlib. + ax.scatter(0, 0, zorder=-10, alpha=0) + + def draw_edge( + ax, + u, + v, + d, + seen, + arrowstyle="->, head_width=0.4, head_length=1", + outer_edge=True, + ): + + # avoiding attribute error raised by changes in networkx + if hasattr(G, "node"): + # works with networkx 1.10 + n1 = G.node[u]["patch"] + n2 = G.node[v]["patch"] + else: + # works with networkx 2.4 + n1 = G.nodes[u]["patch"] + n2 = G.nodes[v]["patch"] + + # print("+++++++++++++++++++++++==cmap_links ", cmap_links) + if outer_edge: + rad = -1.0 * curved_radius + if cmap_links is not None: + facecolor = data_to_rgb_links.to_rgba(d["outer_edge_color"]) + else: + if d["outer_edge_color"] is not None: + facecolor = d["outer_edge_color"] + else: + facecolor = standard_color_links + + width = d["outer_edge_width"] + alpha = d["outer_edge_alpha"] + if (u, v) in seen: + rad = seen.get((u, v)) + rad = (rad + np.sign(rad) * 0.1) * -1.0 + arrowstyle = arrowstyle + # link_edge = d['outer_edge_edge'] + linestyle = d.get("outer_edge_style") + + if d.get("outer_edge_attribute", None) == "spurious": + facecolor = "grey" + + if d.get("outer_edge_type") in ["<-o", "<--", "<-x"]: + n1, n2 = n2, n1 + + if d.get("outer_edge_type") in [ + "o-o", + "o--", + "--o", + "---", + "x-x", + "x--", + "--x", + "o-x", + "x-o", + # "+->", + # "<-+", + ]: + arrowstyle = "-" + # linewidth = width*factor + elif d.get("outer_edge_type") == "<->": + arrowstyle = "<->, head_width=0.4, head_length=1" + # linewidth = width*factor + elif d.get("outer_edge_type") in ["o->", "-->", "<-o", "<--", "<-x", "x->", "+->", "<-+"]: + arrowstyle = "->, head_width=0.4, head_length=1" + + else: + rad = -1.0 * inner_edge_curved * curved_radius + if cmap_links is not None: + facecolor = data_to_rgb_links.to_rgba(d["inner_edge_color"]) + else: + if d["inner_edge_color"] is not None: + facecolor = d["inner_edge_color"] + else: + # print("HERE") + facecolor = standard_color_links + + width = d["inner_edge_width"] + alpha = d["inner_edge_alpha"] + + if d.get("inner_edge_attribute", None) == "spurious": + facecolor = "grey" + # print(d.get("inner_edge_type")) + if d.get("inner_edge_type") in ["<-o", "<--", "<-x", "<-+"]: + n1, n2 = n2, n1 + + if d.get("inner_edge_type") in [ + "o-o", + "o--", + "--o", + "---", + "x-x", + "x--", + "--x", + "o-x", + "x-o", + ]: + arrowstyle = "-" + elif d.get("inner_edge_type") == "<->": + arrowstyle = "<->, head_width=0.4, head_length=1" + elif d.get("inner_edge_type") in ["o->", "-->", "<-o", "<--", "<-x", "x->", "+->"]: + arrowstyle = "->, head_width=0.4, head_length=1" + + linestyle = d.get("inner_edge_style") + + coor1 = n1.center + coor2 = n2.center + + marker_size = width ** 2 + figuresize = fig.get_size_inches() + + # print("COLOR ", facecolor) + e_p = FancyArrowPatch( + coor1, + coor2, + arrowstyle=arrowstyle, + connectionstyle=f"arc3,rad={rad}", + mutation_scale=width, + lw=width / 2, + alpha=alpha, + linestyle=linestyle, + color=facecolor, + clip_on=False, + patchA=n1, + patchB=n2, + shrinkA=0, + shrinkB=0, + zorder=-1, + ) + + ax.add_artist(e_p) + path = e_p.get_path() + vertices = path.vertices.copy() + m, n = vertices.shape + + start = vertices[0] + end = vertices[-1] + + # This must be added to avoid rescaling of the plot, when no 'o' + # or 'x' is added to the graph. + ax.scatter(*start, zorder=-10, alpha=0) + + if outer_edge: + if d.get("outer_edge_type") in ["o->", "o--"]: + circle_marker_start = ax.scatter( + *start, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + elif d.get("outer_edge_type") == "<-o": + circle_marker_end = ax.scatter( + *start, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("outer_edge_type") == "--o": + circle_marker_end = ax.scatter( + *end, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("outer_edge_type") in ["x--", "x->"]: + circle_marker_start = ax.scatter( + *start, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + elif d.get("outer_edge_type") in ["+--", "+->"]: + circle_marker_start = ax.scatter( + *start, + marker="P", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + elif d.get("outer_edge_type") == "<-x": + circle_marker_end = ax.scatter( + *start, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("outer_edge_type") == "<-+": + circle_marker_end = ax.scatter( + *start, + marker="P", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("outer_edge_type") == "--x": + circle_marker_end = ax.scatter( + *end, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("outer_edge_type") == "o-o": + circle_marker_start = ax.scatter( + *start, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + circle_marker_end = ax.scatter( + *end, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("outer_edge_type") == "x-x": + circle_marker_start = ax.scatter( + *start, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + circle_marker_end = ax.scatter( + *end, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("outer_edge_type") == "o-x": + circle_marker_start = ax.scatter( + *start, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + circle_marker_end = ax.scatter( + *end, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("outer_edge_type") == "x-o": + circle_marker_start = ax.scatter( + *start, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + circle_marker_end = ax.scatter( + *end, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + + else: + if d.get("inner_edge_type") in ["o->", "o--"]: + circle_marker_start = ax.scatter( + *start, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + elif d.get("inner_edge_type") == "<-o": + circle_marker_end = ax.scatter( + *start, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("inner_edge_type") == "--o": + circle_marker_end = ax.scatter( + *end, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("inner_edge_type") in ["x--", "x->"]: + circle_marker_start = ax.scatter( + *start, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + elif d.get("inner_edge_type") in ["+--", "+->"]: + circle_marker_start = ax.scatter( + *start, + marker="P", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + elif d.get("inner_edge_type") == "<-x": + circle_marker_end = ax.scatter( + *start, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("inner_edge_type") == "<-+": + circle_marker_end = ax.scatter( + *start, + marker="P", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("inner_edge_type") == "--x": + circle_marker_end = ax.scatter( + *end, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("inner_edge_type") == "o-o": + circle_marker_start = ax.scatter( + *start, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + circle_marker_end = ax.scatter( + *end, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("inner_edge_type") == "x-x": + circle_marker_start = ax.scatter( + *start, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + circle_marker_end = ax.scatter( + *end, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("inner_edge_type") == "o-x": + circle_marker_start = ax.scatter( + *start, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + circle_marker_end = ax.scatter( + *end, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + elif d.get("inner_edge_type") == "x-o": + circle_marker_start = ax.scatter( + *start, + marker="X", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_start) + circle_marker_end = ax.scatter( + *end, + marker="o", + s=marker_size, + facecolor="w", + edgecolor=facecolor, + zorder=1, + ) + ax.add_collection(circle_marker_end) + + if d["label"] is not None and outer_edge: + # Attach labels of lags + trans = None # patch.get_transform() + path = e_p.get_path() + verts = path.to_polygons(trans)[0] + if len(verts) > 2: + label_vert = verts[1, :] + l = d["label"] + string = str(l) + txt = ax.text( + label_vert[0], + label_vert[1], + string, + fontsize=link_label_fontsize, + verticalalignment="center", + horizontalalignment="center", + color="w", + zorder=1, + ) + txt.set_path_effects( + [PathEffects.withStroke(linewidth=2, foreground="k")] + ) + + return rad + + # Collect all edge weights to get color scale + all_links_weights = [] + all_links_edge_weights = [] + for (u, v, d) in G.edges(data=True): + if u != v: + if d["outer_edge"] and d["outer_edge_color"] is not None: + all_links_weights.append(d["outer_edge_color"]) + if d["inner_edge"] and d["inner_edge_color"] is not None: + all_links_weights.append(d["inner_edge_color"]) + + if cmap_links is not None and len(all_links_weights) > 0: + if links_vmin is None: + links_vmin = np.array(all_links_weights).min() + if links_vmax is None: + links_vmax = np.array(all_links_weights).max() + data_to_rgb_links = pyplot.cm.ScalarMappable( + norm=None, cmap=pyplot.get_cmap(cmap_links) + ) + data_to_rgb_links.set_array(np.array(all_links_weights)) + data_to_rgb_links.set_clim(vmin=links_vmin, vmax=links_vmax) + # Create colorbars for links + + # setup colorbar axes. + if show_colorbar: + cax_e = pyplot.axes( + [ + 0.55, + ax.get_subplotspec().get_position(ax.figure).bounds[1] + 0.02, + 0.4, + 0.025 + (len(all_links_edge_weights) == 0) * 0.035, + ], + frameon=False, + ) + + cb_e = pyplot.colorbar( + data_to_rgb_links, cax=cax_e, orientation="horizontal" + ) + # try: + cb_e.set_ticks( + np.arange( + _myround(links_vmin, links_ticks, "down"), + _myround(links_vmax, links_ticks, "up") + links_ticks, + links_ticks, + ) + ) + # except: + # print('no ticks given') + + cb_e.outline.clear() + cax_e.set_xlabel( + link_colorbar_label, labelpad=1, fontsize=label_fontsize, zorder=-10 + ) + + ## + # Draw nodes + ## + node_sizes = np.zeros((len(node_rings), N)) + for ring in list(node_rings): # iterate through to get all node sizes + if node_rings[ring]["sizes"] is not None: + node_sizes[ring] = node_rings[ring]["sizes"] + + else: + node_sizes[ring] = standard_size + max_sizes = node_sizes.max(axis=1) + total_max_size = node_sizes.sum(axis=0).max() + node_sizes /= total_max_size + node_sizes *= standard_size + + def get_aspect(ax): + # Total figure size + figW, figH = ax.get_figure().get_size_inches() + # print(figW, figH) + # Axis size on figure + _, _, w, h = ax.get_position().bounds + # Ratio of display units + # print(w, h) + disp_ratio = (figH * h) / (figW * w) + # Ratio of data units + # Negative over negative because of the order of subtraction + data_ratio = sub(*ax.get_ylim()) / sub(*ax.get_xlim()) + # print(data_ratio, disp_ratio) + return disp_ratio / data_ratio + + if node_aspect is None: + node_aspect = get_aspect(ax) + + # start drawing the outer ring first... + for ring in list(node_rings)[::-1]: + # print ring + # dictionary of rings: {0:{'sizes':(N,)-array, 'color_array':(N,)-array + # or None, 'cmap':string, 'vmin':float or None, 'vmax':float or None}} + if node_rings[ring]["color_array"] is not None: + color_data = node_rings[ring]["color_array"] + if node_rings[ring]["vmin"] is not None: + vmin = node_rings[ring]["vmin"] + else: + vmin = node_rings[ring]["color_array"].min() + if node_rings[ring]["vmax"] is not None: + vmax = node_rings[ring]["vmax"] + else: + vmax = node_rings[ring]["color_array"].max() + if node_rings[ring]["cmap"] is not None: + cmap = node_rings[ring]["cmap"] + else: + cmap = standard_cmap + data_to_rgb = pyplot.cm.ScalarMappable( + norm=None, cmap=pyplot.get_cmap(cmap) + ) + data_to_rgb.set_array(color_data) + data_to_rgb.set_clim(vmin=vmin, vmax=vmax) + colors = [data_to_rgb.to_rgba(color_data[n]) for n in G] + + if node_rings[ring]["colorbar"]: + # Create colorbars for nodes + # cax_n = pyplot.axes([.8 + ring*0.11, + # ax.get_subplotspec().get_position(ax.figure).bounds[1]+0.05, 0.025, 0.35], frameon=False) # + # setup colorbar axes. + # setup colorbar axes. + cax_n = pyplot.axes( + [ + 0.05, + ax.get_subplotspec().get_position(ax.figure).bounds[1] + 0.02 + ring * 0.11, + 0.4, + 0.025 + (len(node_rings) == 1) * 0.035, + ], + frameon=False, + ) + cb_n = pyplot.colorbar(data_to_rgb, cax=cax_n, orientation="horizontal") + # try: + cb_n.set_ticks( + np.arange( + _myround(vmin, node_rings[ring]["ticks"], "down"), + _myround(vmax, node_rings[ring]["ticks"], "up") + + node_rings[ring]["ticks"], + node_rings[ring]["ticks"], + ) + ) + # except: + # print ('no ticks given') + cb_n.outline.clear() + # cb_n.set_ticks() + cax_n.set_xlabel( + node_rings[ring]["label"], labelpad=1, fontsize=label_fontsize + ) + else: + colors = None + vmin = None + vmax = None + + for n in G: + if type(node_alpha) == dict: + alpha = node_alpha[n] + else: + alpha = 1.0 + + if special_nodes is not None: + if n in special_nodes: + color_here = special_nodes[n] + else: + color_here = 'grey' + else: + if colors is None: + color_here = standard_color_nodes + else: + color_here = colors[n] + + c = Ellipse( + pos[n], + width=node_sizes[: ring + 1].sum(axis=0)[n] * node_aspect, + height=node_sizes[: ring + 1].sum(axis=0)[n], + clip_on=False, + facecolor=color_here, + edgecolor=color_here, + zorder=-ring - 1, + ) + + # else: + # if special_nodes is not None and n in special_nodes: + # color_here = special_nodes[n] + # else: + # color_here = colors[n] + # c = Ellipse( + # pos[n], + # width=node_sizes[: ring + 1].sum(axis=0)[n] * node_aspect, + # height=node_sizes[: ring + 1].sum(axis=0)[n], + # clip_on=False, + # facecolor=colors[n], + # edgecolor=colors[n], + # zorder=-ring - 1, + # ) + + ax.add_patch(c) + + # avoiding attribute error raised by changes in networkx + if hasattr(G, "node"): + # works with networkx 1.10 + G.node[n]["patch"] = c + else: + # works with networkx 2.4 + G.nodes[n]["patch"] = c + + if ring == 0: + ax.text( + pos[n][0], + pos[n][1], + node_labels[n], + fontsize=node_label_size, + horizontalalignment="center", + verticalalignment="center", + alpha=1.0, + ) + + # Draw edges + seen = {} + for (u, v, d) in G.edges(data=True): + if d.get("no_links"): + d["inner_edge_alpha"] = 1e-8 + d["outer_edge_alpha"] = 1e-8 + if u != v: + if d["outer_edge"]: + seen[(u, v)] = draw_edge(ax, u, v, d, seen, arrowstyle, outer_edge=True) + if d["inner_edge"]: + seen[(u, v)] = draw_edge(ax, u, v, d, seen, outer_edge=False) + + pyplot.subplots_adjust(bottom=network_lower_bound) + + +
[docs]def plot_graph( + graph=None, + val_matrix=None, + var_names=None, + fig_ax=None, + figsize=None, + save_name=None, + link_colorbar_label="MCI", + node_colorbar_label="auto-MCI", + link_width=None, + link_attribute=None, + node_pos=None, + arrow_linewidth=10.0, + vmin_edges=-1, + vmax_edges=1.0, + edge_ticks=0.4, + cmap_edges="RdBu_r", + vmin_nodes=0, + vmax_nodes=1.0, + node_ticks=0.4, + cmap_nodes="OrRd", + node_size=0.3, + node_aspect=None, + arrowhead_size=20, + curved_radius=0.2, + label_fontsize=10, + alpha=1.0, + node_label_size=10, + link_label_fontsize=10, + lag_array=None, + network_lower_bound=0.2, + show_colorbar=True, + inner_edge_style="dashed", + link_matrix=None, + special_nodes=None, +): + """Creates a network plot. + + This is still in beta. The network is defined from links in graph. Nodes + denote variables, straight links contemporaneous dependencies and curved + arrows lagged dependencies. The node color denotes the maximal absolute + auto-dependency and the link color the value at the lag with maximal + absolute cross-dependency. The link label lists the lags with significant + dependency in order of absolute magnitude. The network can also be + plotted over a map drawn before on the same axis. Then the node positions + can be supplied in appropriate axis coordinates via node_pos. + + Parameters + ---------- + graph : string or bool array-like, optional (default: None) + Either string matrix providing graph or bool array providing only adjacencies + Must be of same shape as val_matrix. + val_matrix : array_like + Matrix of shape (N, N, tau_max+1) containing test statistic values. + var_names : list, optional (default: None) + List of variable names. If None, range(N) is used. + fig_ax : tuple of figure and axis object, optional (default: None) + Figure and axes instance. If None they are created. + figsize : tuple + Size of figure. + save_name : str, optional (default: None) + Name of figure file to save figure. If None, figure is shown in window. + link_colorbar_label : str, optional (default: 'MCI') + Test statistic label. + node_colorbar_label : str, optional (default: 'auto-MCI') + Test statistic label for auto-dependencies. + link_width : array-like, optional (default: None) + Array of val_matrix.shape specifying relative link width with maximum + given by arrow_linewidth. If None, all links have same width. + link_attribute : array-like, optional (default: None) + String array of val_matrix.shape specifying link attributes. + node_pos : dictionary, optional (default: None) + Dictionary of node positions in axis coordinates of form + node_pos = {'x':array of shape (N,), 'y':array of shape(N)}. These + coordinates could have been transformed before for basemap plots. + arrow_linewidth : float, optional (default: 30) + Linewidth. + vmin_edges : float, optional (default: -1) + Link colorbar scale lower bound. + vmax_edges : float, optional (default: 1) + Link colorbar scale upper bound. + edge_ticks : float, optional (default: 0.4) + Link tick mark interval. + cmap_edges : str, optional (default: 'RdBu_r') + Colormap for links. + vmin_nodes : float, optional (default: 0) + Node colorbar scale lower bound. + vmax_nodes : float, optional (default: 1) + Node colorbar scale upper bound. + node_ticks : float, optional (default: 0.4) + Node tick mark interval. + cmap_nodes : str, optional (default: 'OrRd') + Colormap for links. + node_size : int, optional (default: 0.3) + Node size. + node_aspect : float, optional (default: None) + Ratio between the heigth and width of the varible nodes. + arrowhead_size : int, optional (default: 20) + Size of link arrow head. Passed on to FancyArrowPatch object. + curved_radius, float, optional (default: 0.2) + Curvature of links. Passed on to FancyArrowPatch object. + label_fontsize : int, optional (default: 10) + Fontsize of colorbar labels. + alpha : float, optional (default: 1.) + Opacity. + node_label_size : int, optional (default: 10) + Fontsize of node labels. + link_label_fontsize : int, optional (default: 6) + Fontsize of link labels. + lag_array : array, optional (default: None) + Optional specification of lags overwriting np.arange(0, tau_max+1) + network_lower_bound : float, optional (default: 0.2) + Fraction of vertical space below graph plot. + show_colorbar : bool + Whether to show colorbars for links and nodes. + """ + + if link_matrix is not None: + raise ValueError("link_matrix is deprecated and replaced by graph array" + " which is now returned by all methods.") + + if fig_ax is None: + fig = pyplot.figure(figsize=figsize) + ax = fig.add_subplot(111, frame_on=False) + else: + fig, ax = fig_ax + + graph = graph.squeeze() + + if graph.ndim == 4: + raise ValueError("Time series graph of shape (N,N,tau_max+1,tau_max+1) cannot be represented by plot_graph," + " use plot_time_series_graph instead.") + + if graph.ndim == 2: + # If a non-time series (N,N)-graph is given, insert a dummy dimension + graph = np.expand_dims(graph, axis = 2) + + if val_matrix is None: + no_coloring = True + cmap_edges = None + cmap_nodes = None + else: + no_coloring = False + + (graph, val_matrix, link_width, link_attribute) = _check_matrices( + graph, val_matrix, link_width, link_attribute) + + + N, N, dummy = graph.shape + tau_max = dummy - 1 + max_lag = tau_max + 1 + + if np.count_nonzero(graph != "") == np.count_nonzero( + np.diagonal(graph) != "" + ): + diagonal = True + else: + diagonal = False + + if np.count_nonzero(graph == "") == graph.size or diagonal: + graph[0, 1, 0] = "---" + no_links = True + else: + no_links = False + + if var_names is None: + var_names = range(N) + + # Define graph links by absolute maximum (positive or negative like for + # partial correlation) + # val_matrix[np.abs(val_matrix) < sig_thres] = 0. + + # Only draw link in one direction among contemp + # Remove lower triangle + link_matrix_upper = np.copy(graph) + link_matrix_upper[:, :, 0] = np.triu(link_matrix_upper[:, :, 0]) + + # net = _get_absmax(link_matrix != "") + net = np.any(link_matrix_upper != "", axis=2) + G = nx.DiGraph(net) + + # This handels Graphs with no links. + # nx.draw(G, alpha=0, zorder=-10) + + node_color = list(np.zeros(N)) + # list of all strengths for color map + all_strengths = [] + # Add attributes, contemporaneous and lagged links are handled separately + for (u, v, dic) in G.edges(data=True): + dic["no_links"] = no_links + # average lagfunc for link u --> v ANDOR u -- v + if tau_max > 0: + # argmax of absolute maximum + argmax = np.abs(val_matrix[u, v][1:]).argmax() + 1 + else: + argmax = 0 + + if u != v: + # For contemp links masking or finite samples can lead to different + # values for u--v and v--u + # Here we use the maximum for the width and weight (=color) + # of the link + # Draw link if u--v OR v--u at lag 0 is nonzero + # dic['inner_edge'] = ((np.abs(val_matrix[u, v][0]) >= + # sig_thres[u, v][0]) or + # (np.abs(val_matrix[v, u][0]) >= + # sig_thres[v, u][0])) + dic["inner_edge"] = link_matrix_upper[u, v, 0] + dic["inner_edge_type"] = link_matrix_upper[u, v, 0] + dic["inner_edge_alpha"] = alpha + if no_coloring: + dic["inner_edge_color"] = None + else: + dic["inner_edge_color"] = val_matrix[u, v, 0] + # # value at argmax of average + # if np.abs(val_matrix[u, v][0] - val_matrix[v, u][0]) > .0001: + # print("Contemporaneous I(%d; %d)=%.3f != I(%d; %d)=%.3f" % ( + # u, v, val_matrix[u, v][0], v, u, val_matrix[v, u][0]) + + # " due to conditions, finite sample effects or " + # "masking, here edge color = " + # "larger (absolute) value.") + # dic['inner_edge_color'] = _get_absmax( + # np.array([[[val_matrix[u, v][0], + # val_matrix[v, u][0]]]])).squeeze() + + if link_width is None: + dic["inner_edge_width"] = arrow_linewidth + else: + dic["inner_edge_width"] = ( + link_width[u, v, 0] / link_width.max() * arrow_linewidth + ) + + if link_attribute is None: + dic["inner_edge_attribute"] = None + else: + dic["inner_edge_attribute"] = link_attribute[u, v, 0] + + # # fraction of nonzero values + dic["inner_edge_style"] = "solid" + # else: + # dic['inner_edge_style'] = link_style[ + # u, v, 0] + + all_strengths.append(dic["inner_edge_color"]) + + if tau_max > 0: + # True if ensemble mean at lags > 0 is nonzero + # dic['outer_edge'] = np.any( + # np.abs(val_matrix[u, v][1:]) >= sig_thres[u, v][1:]) + dic["outer_edge"] = np.any(link_matrix_upper[u, v, 1:] != "") + else: + dic["outer_edge"] = False + + dic["outer_edge_type"] = link_matrix_upper[u, v, argmax] + + dic["outer_edge_alpha"] = alpha + if link_width is None: + # fraction of nonzero values + dic["outer_edge_width"] = arrow_linewidth + else: + dic["outer_edge_width"] = ( + link_width[u, v, argmax] / link_width.max() * arrow_linewidth + ) + + if link_attribute is None: + # fraction of nonzero values + dic["outer_edge_attribute"] = None + else: + dic["outer_edge_attribute"] = link_attribute[u, v, argmax] + + # value at argmax of average + if no_coloring: + dic["outer_edge_color"] = None + else: + dic["outer_edge_color"] = val_matrix[u, v][argmax] + all_strengths.append(dic["outer_edge_color"]) + + # Sorted list of significant lags (only if robust wrt + # d['min_ensemble_frac']) + if tau_max > 0: + lags = np.abs(val_matrix[u, v][1:]).argsort()[::-1] + 1 + sig_lags = (np.where(link_matrix_upper[u, v, 1:] != "")[0] + 1).tolist() + else: + lags, sig_lags = [], [] + if lag_array is not None: + dic["label"] = str([lag_array[l] for l in lags if l in sig_lags])[1:-1] + else: + dic["label"] = str([l for l in lags if l in sig_lags])[1:-1] + else: + # Node color is max of average autodependency + if no_coloring: + node_color[u] = None + else: + node_color[u] = val_matrix[u, v][argmax] + dic["inner_edge_attribute"] = None + dic["outer_edge_attribute"] = None + + # dic['outer_edge_edge'] = False + # dic['outer_edge_edgecolor'] = None + # dic['inner_edge_edge'] = False + # dic['inner_edge_edgecolor'] = None + + if special_nodes is not None: + special_nodes_draw = {} + for node in special_nodes: + i, tau = node + if tau >= -tau_max: + special_nodes_draw[i] = special_nodes[node] + special_nodes = special_nodes_draw + + + # If no links are present, set value to zero + if len(all_strengths) == 0: + all_strengths = [0.0] + + if node_pos is None: + pos = nx.circular_layout(deepcopy(G)) + else: + pos = {} + for i in range(N): + pos[i] = (node_pos["x"][i], node_pos["y"][i]) + + if cmap_nodes is None: + node_color = None + + node_rings = { + 0: { + "sizes": None, + "color_array": node_color, + "cmap": cmap_nodes, + "vmin": vmin_nodes, + "vmax": vmax_nodes, + "ticks": node_ticks, + "label": node_colorbar_label, + "colorbar": show_colorbar, + } + } + + _draw_network_with_curved_edges( + fig=fig, + ax=ax, + G=deepcopy(G), + pos=pos, + # dictionary of rings: {0:{'sizes':(N,)-array, 'color_array':(N,)-array + # or None, 'cmap':string, + node_rings=node_rings, + # 'vmin':float or None, 'vmax':float or None, 'label':string or None}} + node_labels=var_names, + node_label_size=node_label_size, + node_alpha=alpha, + standard_size=node_size, + node_aspect=node_aspect, + standard_cmap="OrRd", + standard_color_nodes="lightgrey", + standard_color_links="black", + log_sizes=False, + cmap_links=cmap_edges, + links_vmin=vmin_edges, + links_vmax=vmax_edges, + links_ticks=edge_ticks, + # cmap_links_edges='YlOrRd', links_edges_vmin=-1., links_edges_vmax=1., + # links_edges_ticks=.2, link_edge_colorbar_label='link_edge', + arrowstyle="simple", + arrowhead_size=arrowhead_size, + curved_radius=curved_radius, + label_fontsize=label_fontsize, + link_label_fontsize=link_label_fontsize, + link_colorbar_label=link_colorbar_label, + network_lower_bound=network_lower_bound, + show_colorbar=show_colorbar, + # label_fraction=label_fraction, + special_nodes=special_nodes, + ) + + if save_name is not None: + pyplot.savefig(save_name, dpi=300) + else: + return fig, ax
+ + +def _reverse_patt(patt): + """Inverts a link pattern""" + + if patt == "": + return "" + + left_mark, middle_mark, right_mark = patt[0], patt[1], patt[2] + if left_mark == "<": + new_right_mark = ">" + else: + new_right_mark = left_mark + if right_mark == ">": + new_left_mark = "<" + else: + new_left_mark = right_mark + + return new_left_mark + middle_mark + new_right_mark + + # if patt in ['---', 'o--', '--o', 'o-o', '']: + # return patt[::-1] + # elif patt == '<->': + # return '<->' + # elif patt == 'o->': + # return '<-o' + # elif patt == '<-o': + # return 'o->' + # elif patt == '-->': + # return '<--' + # elif patt == '<--': + # return '-->' + + +def _check_matrices(graph, val_matrix, link_width, link_attribute): + + + if graph.dtype != "<U3": + # Transform to new graph data type U3 + old_matrix = np.copy(graph) + graph = np.zeros(old_matrix.shape, dtype="<U3") + graph[:] = "" + for i, j, tau in zip(*np.where(old_matrix)): + if tau == 0: + if old_matrix[j, i, 0] == 0: + graph[i, j, 0] = "-->" + graph[j, i, 0] = "<--" + else: + graph[i, j, 0] = "o-o" + graph[j, i, 0] = "o-o" + else: + graph[i, j, tau] = "-->" + elif graph.ndim == 4: + pass + else: + # print(graph[:,:,0]) + # Assert that graph has valid and consistent lag-zero entries + for i, j, tau in zip(*np.where(graph)): + if tau == 0: + if graph[i, j, 0] != _reverse_patt(graph[j, i, 0]): + raise ValueError( + "graph needs to have consistent lag-zero links (eg" + " graph[i,j,0]='-->' requires graph[j,i,0]='<--')" + ) + if ( + val_matrix is not None + and val_matrix[i, j, 0] != val_matrix[j, i, 0] + ): + raise ValueError("val_matrix needs to be symmetric for lag-zero") + if ( + link_width is not None + and link_width[i, j, 0] != link_width[j, i, 0] + ): + raise ValueError("link_width needs to be symmetric for lag-zero") + if ( + link_attribute is not None + and link_attribute[i, j, 0] != link_attribute[j, i, 0] + ): + raise ValueError( + "link_attribute needs to be symmetric for lag-zero" + ) + + if graph[i, j, tau] not in [ + "---", + "o--", + "--o", + "o-o", + "o->", + "<-o", + "-->", + "<--", + "<->", + "x-o", + "o-x", + "x--", + "--x", + "x->", + "<-x", + "x-x", + "<-+", + "+->", + ]: + raise ValueError("Invalid graph entry.") + + if val_matrix is None: + # if graph.ndim == 4: + # val_matrix = (graph != "").astype("int") + # else: + val_matrix = (graph != "").astype("int") + + if link_width is not None and not np.all(link_width >= 0.0): + raise ValueError("link_width must be non-negative") + + return graph, val_matrix, link_width, link_attribute + + +
[docs]def plot_time_series_graph( + graph=None, + val_matrix=None, + var_names=None, + fig_ax=None, + figsize=None, + link_colorbar_label="MCI", + save_name=None, + link_width=None, + link_attribute=None, + arrow_linewidth=8, + vmin_edges=-1, + vmax_edges=1.0, + edge_ticks=0.4, + cmap_edges="RdBu_r", + order=None, + node_size=0.1, + node_aspect=None, + arrowhead_size=20, + curved_radius=0.2, + label_fontsize=12, + alpha=1.0, + node_label_size=12, + label_space_left=0.1, + label_space_top=0.0, + network_lower_bound=0.2, + inner_edge_style="dashed", + link_matrix=None, + special_nodes=None, + # aux_graph=None, + standard_color_links='black', + standard_color_nodes='lightgrey', +): + """Creates a time series graph. + This is still in beta. The time series graph's links are colored by + val_matrix. + + Parameters + ---------- + graph : string or bool array-like, optional (default: None) + Either string matrix providing graph or bool array providing only adjacencies + Either of shape (N, N, tau_max + 1) or as auxiliary graph of dims + (N, N, tau_max+1, tau_max+1) describing auxADMG. + val_matrix : array_like + Matrix of shape (N, N, tau_max+1) containing test statistic values. + var_names : list, optional (default: None) + List of variable names. If None, range(N) is used. + fig_ax : tuple of figure and axis object, optional (default: None) + Figure and axes instance. If None they are created. + figsize : tuple + Size of figure. + save_name : str, optional (default: None) + Name of figure file to save figure. If None, figure is shown in window. + link_colorbar_label : str, optional (default: 'MCI') + Test statistic label. + link_width : array-like, optional (default: None) + Array of val_matrix.shape specifying relative link width with maximum + given by arrow_linewidth. If None, all links have same width. + order : list, optional (default: None) + order of variables from top to bottom. + arrow_linewidth : float, optional (default: 30) + Linewidth. + vmin_edges : float, optional (default: -1) + Link colorbar scale lower bound. + vmax_edges : float, optional (default: 1) + Link colorbar scale upper bound. + edge_ticks : float, optional (default: 0.4) + Link tick mark interval. + cmap_edges : str, optional (default: 'RdBu_r') + Colormap for links. + node_size : int, optional (default: 0.1) + Node size. + node_aspect : float, optional (default: None) + Ratio between the heigth and width of the varible nodes. + arrowhead_size : int, optional (default: 20) + Size of link arrow head. Passed on to FancyArrowPatch object. + curved_radius, float, optional (default: 0.2) + Curvature of links. Passed on to FancyArrowPatch object. + label_fontsize : int, optional (default: 10) + Fontsize of colorbar labels. + alpha : float, optional (default: 1.) + Opacity. + node_label_size : int, optional (default: 10) + Fontsize of node labels. + link_label_fontsize : int, optional (default: 6) + Fontsize of link labels. + label_space_left : float, optional (default: 0.1) + Fraction of horizontal figure space to allocate left of plot for labels. + label_space_top : float, optional (default: 0.) + Fraction of vertical figure space to allocate top of plot for labels. + network_lower_bound : float, optional (default: 0.2) + Fraction of vertical space below graph plot. + inner_edge_style : string, optional (default: 'dashed') + Style of inner_edge contemporaneous links. + special_nodes : dict + Dictionary of format {(i, -tau): 'blue', ...} to color special nodes. + """ + + if link_matrix is not None: + raise ValueError("link_matrix is deprecated and replaced by graph array" + " which is now returned by all methods.") + + if fig_ax is None: + fig = pyplot.figure(figsize=figsize) + ax = fig.add_subplot(111, frame_on=False) + else: + fig, ax = fig_ax + + if val_matrix is None: + no_coloring = True + cmap_edges = None + else: + no_coloring = False + + (graph, val_matrix, link_width, link_attribute) = _check_matrices( + graph, val_matrix, link_width, link_attribute + ) + + if graph.ndim == 4: + N, N, dummy, _ = graph.shape + tau_max = dummy - 1 + max_lag = tau_max + 1 + else: + N, N, dummy = graph.shape + tau_max = dummy - 1 + max_lag = tau_max + 1 + + if np.count_nonzero(graph == "") == graph.size: + if graph.ndim == 4: + graph[0, 1, 0, 0] = "---" + else: + graph[0, 1, 0] = "---" + no_links = True + else: + no_links = False + + if var_names is None: + var_names = range(N) + + if order is None: + order = range(N) + + if set(order) != set(range(N)): + raise ValueError("order must be a permutation of range(N)") + + def translate(row, lag): + return row * max_lag + lag + + # Define graph links by absolute maximum (positive or negative like for + # partial correlation) + tsg = np.zeros((N * max_lag, N * max_lag)) + tsg_val = np.zeros((N * max_lag, N * max_lag)) + tsg_width = np.zeros((N * max_lag, N * max_lag)) + tsg_style = np.zeros((N * max_lag, N * max_lag), dtype=graph.dtype) + if link_attribute is not None: + tsg_attr = np.zeros((N * max_lag, N * max_lag), dtype=link_attribute.dtype) + + # Only draw link in one direction + # Remove lower triangle + if graph.ndim == 4: + for i, j, taui, tauj in np.column_stack(np.where(graph)): + tau = taui - tauj + if tau <= 0 and j <= i: + continue + # print(max_lag, (i, -taui), (j, -tauj), aux_graph[i, j, taui, tauj]) + # print(translate(i, max_lag - 1 - taui), translate(j, max_lag-1-tauj)) + tsg[translate(i, max_lag - 1 - taui), translate(j, max_lag-1-tauj)] = 1.0 + tsg_val[translate(i, max_lag - 1 - taui), translate(j, max_lag-1-tauj)] = 1. #val_matrix[i, j, tau] + tsg_style[translate(i, max_lag - 1 - taui), translate(j, max_lag-1-tauj)] = graph[i, j, taui, tauj] + if link_width is not None: + tsg_width[translate(i, max_lag - 1 - taui), translate(j, max_lag-1-tauj)] = arrow_linewidth + if link_attribute is not None: + tsg_attr[translate(i, max_lag - 1 - taui), translate(j, max_lag-1-tauj)] = 'spurious' + # print(tsg_style) + # print(tsg) + + else: + link_matrix_tsg = np.copy(graph) + link_matrix_tsg[:, :, 0] = np.triu(graph[:, :, 0]) + + for i, j, tau in np.column_stack(np.where(link_matrix_tsg)): + for t in range(max_lag): + if ( + 0 <= translate(i, t - tau) + and translate(i, t - tau) % max_lag <= translate(j, t) % max_lag + ): + + tsg[ + translate(i, t - tau), translate(j, t) + ] = 1.0 # val_matrix[i, j, tau] + tsg_val[translate(i, t - tau), translate(j, t)] = val_matrix[i, j, tau] + tsg_style[translate(i, t - tau), translate(j, t)] = graph[ + i, j, tau + ] + if link_width is not None: + tsg_width[translate(i, t - tau), translate(j, t)] = ( + link_width[i, j, tau] / link_width.max() * arrow_linewidth + ) + if link_attribute is not None: + tsg_attr[translate(i, t - tau), translate(j, t)] = link_attribute[ + i, j, tau + ] + + + G = nx.DiGraph(tsg) + + if special_nodes is not None: + special_nodes_tsg = {} + for node in special_nodes: + i, tau = node + if tau >= -tau_max: + special_nodes_tsg[translate(i, max_lag-1 + tau)] = special_nodes[node] + + special_nodes = special_nodes_tsg + + # node_color = np.zeros(N) + # list of all strengths for color map + all_strengths = [] + # Add attributes, contemporaneous and lagged links are handled separately + for (u, v, dic) in G.edges(data=True): + dic["no_links"] = no_links + if u != v: + dic["inner_edge"] = False + dic["outer_edge"] = True + + dic["outer_edge_type"] = tsg_style[u, v] + + dic["outer_edge_alpha"] = alpha + + if link_width is None: + # fraction of nonzero values + dic["outer_edge_width"] = dic["inner_edge_width"] = arrow_linewidth + else: + dic["outer_edge_width"] = dic["inner_edge_width"] = tsg_width[u, v] + + if link_attribute is None: + dic["outer_edge_attribute"] = None + else: + dic["outer_edge_attribute"] = tsg_attr[u, v] + + # value at argmax of average + if no_coloring: + dic["outer_edge_color"] = None + else: + dic["outer_edge_color"] = tsg_val[u, v] + + all_strengths.append(dic["outer_edge_color"]) + dic["label"] = None + # print(u, v, dic) + + # If no links are present, set value to zero + if len(all_strengths) == 0: + all_strengths = [0.0] + + posarray = np.zeros((N * max_lag, 2)) + for i in range(N * max_lag): + posarray[i] = np.array([(i % max_lag), (1.0 - i // max_lag)]) + + pos_tmp = {} + for i in range(N * max_lag): + # for n in range(N): + # for tau in range(max_lag): + # i = n*N + tau + pos_tmp[i] = np.array( + [ + ((i % max_lag) - posarray.min(axis=0)[0]) + / (posarray.max(axis=0)[0] - posarray.min(axis=0)[0]), + ((1.0 - i // max_lag) - posarray.min(axis=0)[1]) + / (posarray.max(axis=0)[1] - posarray.min(axis=0)[1]), + ] + ) + pos_tmp[i][np.isnan(pos_tmp[i])] = 0.0 + + pos = {} + for n in range(N): + for tau in range(max_lag): + pos[n * max_lag + tau] = pos_tmp[order[n] * max_lag + tau] + + node_rings = { + 0: {"sizes": None, "color_array": None, "label": "", "colorbar": False,} + } + + node_labels = ["" for i in range(N * max_lag)] + + if graph.ndim == 4: + show_colorbar = False + else: + show_colorbar = True + + _draw_network_with_curved_edges( + fig=fig, + ax=ax, + G=deepcopy(G), + pos=pos, + node_rings=node_rings, + node_labels=node_labels, + node_label_size=node_label_size, + node_alpha=alpha, + standard_size=node_size, + node_aspect=node_aspect, + standard_cmap="OrRd", + standard_color_nodes=standard_color_nodes, + standard_color_links=standard_color_links, + log_sizes=False, + cmap_links=cmap_edges, + links_vmin=vmin_edges, + links_vmax=vmax_edges, + links_ticks=edge_ticks, + arrowstyle="simple", + arrowhead_size=arrowhead_size, + curved_radius=curved_radius, + label_fontsize=label_fontsize, + label_fraction=0.5, + link_colorbar_label=link_colorbar_label, + inner_edge_curved=True, + network_lower_bound=network_lower_bound, + inner_edge_style=inner_edge_style, + special_nodes=special_nodes, + show_colorbar=show_colorbar, + ) + + for i in range(N): + trans = transforms.blended_transform_factory(fig.transFigure, ax.transData) + ax.text( + label_space_left, + pos[order[i] * max_lag][1], + f"{var_names[order[i]]}", + fontsize=label_fontsize, + horizontalalignment="left", + verticalalignment="center", + transform=trans, + ) + + for tau in np.arange(max_lag - 1, -1, -1): + trans = transforms.blended_transform_factory(ax.transData, fig.transFigure) + if tau == max_lag - 1: + ax.text( + pos[tau][0], + 1.0 - label_space_top, + r"$t$", + fontsize=int(label_fontsize * 0.8), + horizontalalignment="center", + verticalalignment="top", + transform=trans, + ) + else: + ax.text( + pos[tau][0], + 1.0 - label_space_top, + r"$t-%s$" % str(max_lag - tau - 1), + fontsize=int(label_fontsize * 0.8), + horizontalalignment="center", + verticalalignment="top", + transform=trans, + ) + + if save_name is not None: + pyplot.savefig(save_name, dpi=300) + else: + return fig, ax
+ + +
[docs]def plot_mediation_time_series_graph( + path_node_array, + tsg_path_val_matrix, + var_names=None, + fig_ax=None, + figsize=None, + link_colorbar_label="link coeff. (edge color)", + node_colorbar_label="MCE (node color)", + save_name=None, + link_width=None, + arrow_linewidth=8, + vmin_edges=-1, + vmax_edges=1.0, + edge_ticks=0.4, + cmap_edges="RdBu_r", + order=None, + vmin_nodes=-1.0, + vmax_nodes=1.0, + node_ticks=0.4, + cmap_nodes="RdBu_r", + node_size=0.1, + node_aspect=None, + arrowhead_size=20, + curved_radius=0.2, + label_fontsize=12, + alpha=1.0, + node_label_size=12, + label_space_left=0.1, + label_space_top=0.0, + network_lower_bound=0.2, + standard_color_links='black', + standard_color_nodes='lightgrey', +): + """Creates a mediation time series graph plot. + This is still in beta. The time series graph's links are colored by + val_matrix. + + Parameters + ---------- + tsg_path_val_matrix : array_like + Matrix of shape (N*tau_max, N*tau_max) containing link weight values. + path_node_array: array_like + Array of shape (N,) containing node values. + var_names : list, optional (default: None) + List of variable names. If None, range(N) is used. + fig_ax : tuple of figure and axis object, optional (default: None) + Figure and axes instance. If None they are created. + figsize : tuple + Size of figure. + save_name : str, optional (default: None) + Name of figure file to save figure. If None, figure is shown in window. + link_colorbar_label : str, optional (default: 'link coeff. (edge color)') + Link colorbar label. + node_colorbar_label : str, optional (default: 'MCE (node color)') + Node colorbar label. + link_width : array-like, optional (default: None) + Array of val_matrix.shape specifying relative link width with maximum + given by arrow_linewidth. If None, all links have same width. + order : list, optional (default: None) + order of variables from top to bottom. + arrow_linewidth : float, optional (default: 30) + Linewidth. + vmin_edges : float, optional (default: -1) + Link colorbar scale lower bound. + vmax_edges : float, optional (default: 1) + Link colorbar scale upper bound. + edge_ticks : float, optional (default: 0.4) + Link tick mark interval. + cmap_edges : str, optional (default: 'RdBu_r') + Colormap for links. + vmin_nodes : float, optional (default: 0) + Node colorbar scale lower bound. + vmax_nodes : float, optional (default: 1) + Node colorbar scale upper bound. + node_ticks : float, optional (default: 0.4) + Node tick mark interval. + cmap_nodes : str, optional (default: 'OrRd') + Colormap for links. + node_size : int, optional (default: 0.1) + Node size. + node_aspect : float, optional (default: None) + Ratio between the heigth and width of the varible nodes. + arrowhead_size : int, optional (default: 20) + Size of link arrow head. Passed on to FancyArrowPatch object. + curved_radius, float, optional (default: 0.2) + Curvature of links. Passed on to FancyArrowPatch object. + label_fontsize : int, optional (default: 10) + Fontsize of colorbar labels. + alpha : float, optional (default: 1.) + Opacity. + node_label_size : int, optional (default: 10) + Fontsize of node labels. + link_label_fontsize : int, optional (default: 6) + Fontsize of link labels. + label_space_left : float, optional (default: 0.1) + Fraction of horizontal figure space to allocate left of plot for labels. + label_space_top : float, optional (default: 0.) + Fraction of vertical figure space to allocate top of plot for labels. + network_lower_bound : float, optional (default: 0.2) + Fraction of vertical space below graph plot. + """ + N = len(path_node_array) + Nmaxlag = tsg_path_val_matrix.shape[0] + max_lag = Nmaxlag // N + + if var_names is None: + var_names = range(N) + + if fig_ax is None: + fig = pyplot.figure(figsize=figsize) + ax = fig.add_subplot(111, frame_on=False) + else: + fig, ax = fig_ax + + if link_width is not None and not np.all(link_width >= 0.0): + raise ValueError("link_width must be non-negative") + + if order is None: + order = range(N) + + if set(order) != set(range(N)): + raise ValueError("order must be a permutation of range(N)") + + def translate(row, lag): + return row * max_lag + lag + + if np.count_nonzero(tsg_path_val_matrix) == np.count_nonzero( + np.diagonal(tsg_path_val_matrix) + ): + diagonal = True + else: + diagonal = False + + if np.count_nonzero(tsg_path_val_matrix) == tsg_path_val_matrix.size or diagonal: + tsg_path_val_matrix[0, 1] = 1 + no_links = True + else: + no_links = False + + # Define graph links by absolute maximum (positive or negative like for + # partial correlation) + tsg = tsg_path_val_matrix + tsg_attr = np.zeros((N * max_lag, N * max_lag)) + + G = nx.DiGraph(tsg) + + # node_color = np.zeros(N) + # list of all strengths for color map + all_strengths = [] + # Add attributes, contemporaneous and lagged links are handled separately + for (u, v, dic) in G.edges(data=True): + dic["no_links"] = no_links + dic["outer_edge_attribute"] = None + + if u != v: + + if u % max_lag == v % max_lag: + dic["inner_edge"] = True + dic["outer_edge"] = False + else: + dic["inner_edge"] = False + dic["outer_edge"] = True + + dic["inner_edge_alpha"] = alpha + dic["inner_edge_color"] = _get_absmax( + np.array([[[tsg[u, v], tsg[v, u]]]]) + ).squeeze() + dic["inner_edge_width"] = arrow_linewidth + all_strengths.append(dic["inner_edge_color"]) + + dic["outer_edge_alpha"] = alpha + + dic["outer_edge_width"] = arrow_linewidth + + # value at argmax of average + dic["outer_edge_color"] = tsg[u, v] + all_strengths.append(dic["outer_edge_color"]) + dic["label"] = None + + # dic['outer_edge_edge'] = False + # dic['outer_edge_edgecolor'] = None + # dic['inner_edge_edge'] = False + # dic['inner_edge_edgecolor'] = None + + # If no links are present, set value to zero + if len(all_strengths) == 0: + all_strengths = [0.0] + + posarray = np.zeros((N * max_lag, 2)) + for i in range(N * max_lag): + posarray[i] = np.array([(i % max_lag), (1.0 - i // max_lag)]) + + pos_tmp = {} + for i in range(N * max_lag): + # for n in range(N): + # for tau in range(max_lag): + # i = n*N + tau + pos_tmp[i] = np.array( + [ + ((i % max_lag) - posarray.min(axis=0)[0]) + / (posarray.max(axis=0)[0] - posarray.min(axis=0)[0]), + ((1.0 - i // max_lag) - posarray.min(axis=0)[1]) + / (posarray.max(axis=0)[1] - posarray.min(axis=0)[1]), + ] + ) + pos_tmp[i][np.isnan(pos_tmp[i])] = 0.0 + + pos = {} + for n in range(N): + for tau in range(max_lag): + pos[n * max_lag + tau] = pos_tmp[order[n] * max_lag + tau] + + node_color = np.zeros(N * max_lag) + for inet, n in enumerate(range(0, N * max_lag, max_lag)): + node_color[n : n + max_lag] = path_node_array[inet] + + # node_rings = {0: {'sizes': None, 'color_array': color_array, + # 'label': '', 'colorbar': False, + # } + # } + + node_rings = { + 0: { + "sizes": None, + "color_array": node_color, + "cmap": cmap_nodes, + "vmin": vmin_nodes, + "vmax": vmax_nodes, + "ticks": node_ticks, + "label": node_colorbar_label, + "colorbar": True, + } + } + + # ] for v in range(max_lag)] + node_labels = ["" for i in range(N * max_lag)] + + _draw_network_with_curved_edges( + fig=fig, + ax=ax, + G=deepcopy(G), + pos=pos, + # dictionary of rings: {0:{'sizes':(N,)-array, 'color_array':(N,)-array + # or None, 'cmap':string, + node_rings=node_rings, + # 'vmin':float or None, 'vmax':float or None, 'label':string or None}} + node_labels=node_labels, + node_label_size=node_label_size, + node_alpha=alpha, + standard_size=node_size, + node_aspect=node_aspect, + standard_cmap="OrRd", + standard_color_nodes=standard_color_nodes, + standard_color_links=standard_color_links, + log_sizes=False, + cmap_links=cmap_edges, + links_vmin=vmin_edges, + links_vmax=vmax_edges, + links_ticks=edge_ticks, + # cmap_links_edges='YlOrRd', links_edges_vmin=-1., links_edges_vmax=1., + # links_edges_ticks=.2, link_edge_colorbar_label='link_edge', + arrowhead_size=arrowhead_size, + curved_radius=curved_radius, + label_fontsize=label_fontsize, + label_fraction=0.5, + link_colorbar_label=link_colorbar_label, + inner_edge_curved=True, + network_lower_bound=network_lower_bound + # inner_edge_style=inner_edge_style + ) + + for i in range(N): + trans = transforms.blended_transform_factory(fig.transFigure, ax.transData) + ax.text( + label_space_left, + pos[order[i] * max_lag][1], + "%s" % str(var_names[order[i]]), + fontsize=label_fontsize, + horizontalalignment="left", + verticalalignment="center", + transform=trans, + ) + + for tau in np.arange(max_lag - 1, -1, -1): + trans = transforms.blended_transform_factory(ax.transData, fig.transFigure) + if tau == max_lag - 1: + ax.text( + pos[tau][0], + 1.0 - label_space_top, + r"$t$", + fontsize=label_fontsize, + horizontalalignment="center", + verticalalignment="top", + transform=trans, + ) + else: + ax.text( + pos[tau][0], + 1.0 - label_space_top, + r"$t-%s$" % str(max_lag - tau - 1), + fontsize=label_fontsize, + horizontalalignment="center", + verticalalignment="top", + transform=trans, + ) + + # fig.subplots_adjust(left=0.1, right=.98, bottom=.25, top=.9) + # savestring = os.path.expanduser(save_name) + if save_name is not None: + pyplot.savefig(save_name) + else: + pyplot.show()
+ + +
[docs]def plot_mediation_graph( + path_val_matrix, + path_node_array=None, + var_names=None, + fig_ax=None, + figsize=None, + save_name=None, + link_colorbar_label="link coeff. (edge color)", + node_colorbar_label="MCE (node color)", + link_width=None, + node_pos=None, + arrow_linewidth=10.0, + vmin_edges=-1, + vmax_edges=1.0, + edge_ticks=0.4, + cmap_edges="RdBu_r", + vmin_nodes=-1.0, + vmax_nodes=1.0, + node_ticks=0.4, + cmap_nodes="RdBu_r", + node_size=0.3, + node_aspect=None, + arrowhead_size=20, + curved_radius=0.2, + label_fontsize=10, + lag_array=None, + alpha=1.0, + node_label_size=10, + link_label_fontsize=10, + network_lower_bound=0.2, + standard_color_links='black', + standard_color_nodes='lightgrey', +): + """Creates a network plot visualizing the pathways of a mediation analysis. + This is still in beta. The network is defined from non-zero entries in + ``path_val_matrix``. Nodes denote variables, straight links contemporaneous + dependencies and curved arrows lagged dependencies. The node color denotes + the mediated causal effect (MCE) and the link color the value at the lag + with maximal link coefficient. The link label lists the lags with + significant dependency in order of absolute magnitude. The network can also + be plotted over a map drawn before on the same axis. Then the node positions + can be supplied in appropriate axis coordinates via node_pos. + + Parameters + ---------- + path_val_matrix : array_like + Matrix of shape (N, N, tau_max+1) containing link weight values. + path_node_array: array_like + Array of shape (N,) containing node values. + var_names : list, optional (default: None) + List of variable names. If None, range(N) is used. + fig_ax : tuple of figure and axis object, optional (default: None) + Figure and axes instance. If None they are created. + figsize : tuple + Size of figure. + save_name : str, optional (default: None) + Name of figure file to save figure. If None, figure is shown in window. + link_colorbar_label : str, optional (default: 'link coeff. (edge color)') + Link colorbar label. + node_colorbar_label : str, optional (default: 'MCE (node color)') + Node colorbar label. + link_width : array-like, optional (default: None) + Array of val_matrix.shape specifying relative link width with maximum + given by arrow_linewidth. If None, all links have same width. + node_pos : dictionary, optional (default: None) + Dictionary of node positions in axis coordinates of form + node_pos = {'x':array of shape (N,), 'y':array of shape(N)}. These + coordinates could have been transformed before for basemap plots. + arrow_linewidth : float, optional (default: 30) + Linewidth. + vmin_edges : float, optional (default: -1) + Link colorbar scale lower bound. + vmax_edges : float, optional (default: 1) + Link colorbar scale upper bound. + edge_ticks : float, optional (default: 0.4) + Link tick mark interval. + cmap_edges : str, optional (default: 'RdBu_r') + Colormap for links. + vmin_nodes : float, optional (default: 0) + Node colorbar scale lower bound. + vmax_nodes : float, optional (default: 1) + Node colorbar scale upper bound. + node_ticks : float, optional (default: 0.4) + Node tick mark interval. + cmap_nodes : str, optional (default: 'OrRd') + Colormap for links. + node_size : int, optional (default: 0.3) + Node size. + node_aspect : float, optional (default: None) + Ratio between the heigth and width of the varible nodes. + arrowhead_size : int, optional (default: 20) + Size of link arrow head. Passed on to FancyArrowPatch object. + curved_radius, float, optional (default: 0.2) + Curvature of links. Passed on to FancyArrowPatch object. + label_fontsize : int, optional (default: 10) + Fontsize of colorbar labels. + alpha : float, optional (default: 1.) + Opacity. + node_label_size : int, optional (default: 10) + Fontsize of node labels. + link_label_fontsize : int, optional (default: 6) + Fontsize of link labels. + network_lower_bound : float, optional (default: 0.2) + Fraction of vertical space below graph plot. + lag_array : array, optional (default: None) + Optional specification of lags overwriting np.arange(0, tau_max+1) + """ + val_matrix = path_val_matrix + + if fig_ax is None: + fig = pyplot.figure(figsize=figsize) + ax = fig.add_subplot(111, frame_on=False) + else: + fig, ax = fig_ax + + if link_width is not None and not np.all(link_width >= 0.0): + raise ValueError("link_width must be non-negative") + + N, N, dummy = val_matrix.shape + tau_max = dummy - 1 + + if np.count_nonzero(val_matrix) == np.count_nonzero(np.diagonal(val_matrix)): + diagonal = True + else: + diagonal = False + + if np.count_nonzero(val_matrix) == val_matrix.size or diagonal: + val_matrix[0, 1, 0] = 1 + no_links = True + else: + no_links = False + + if var_names is None: + var_names = range(N) + + # Define graph links by absolute maximum (positive or negative like for + # partial correlation) + # val_matrix[np.abs(val_matrix) < sig_thres] = 0. + graph = val_matrix != 0.0 + net = _get_absmax(val_matrix) + G = nx.DiGraph(net) + + node_color = np.zeros(N) + # list of all strengths for color map + all_strengths = [] + # Add attributes, contemporaneous and lagged links are handled separately + for (u, v, dic) in G.edges(data=True): + dic["outer_edge_attribute"] = None + dic["no_links"] = no_links + # average lagfunc for link u --> v ANDOR u -- v + if tau_max > 0: + # argmax of absolute maximum + argmax = np.abs(val_matrix[u, v][1:]).argmax() + 1 + else: + argmax = 0 + if u != v: + # For contemp links masking or finite samples can lead to different + # values for u--v and v--u + # Here we use the maximum for the width and weight (=color) + # of the link + # Draw link if u--v OR v--u at lag 0 is nonzero + # dic['inner_edge'] = ((np.abs(val_matrix[u, v][0]) >= + # sig_thres[u, v][0]) or + # (np.abs(val_matrix[v, u][0]) >= + # sig_thres[v, u][0])) + dic["inner_edge"] = graph[u, v, 0] or graph[v, u, 0] + dic["inner_edge_alpha"] = alpha + # value at argmax of average + if np.abs(val_matrix[u, v][0] - val_matrix[v, u][0]) > 0.0001: + print( + "Contemporaneous I(%d; %d)=%.3f != I(%d; %d)=%.3f" + % (u, v, val_matrix[u, v][0], v, u, val_matrix[v, u][0]) + + " due to conditions, finite sample effects or " + "masking, here edge color = " + "larger (absolute) value." + ) + dic["inner_edge_color"] = _get_absmax( + np.array([[[val_matrix[u, v][0], val_matrix[v, u][0]]]]) + ).squeeze() + if link_width is None: + dic["inner_edge_width"] = arrow_linewidth + else: + dic["inner_edge_width"] = ( + link_width[u, v, 0] / link_width.max() * arrow_linewidth + ) + + all_strengths.append(dic["inner_edge_color"]) + + if tau_max > 0: + # True if ensemble mean at lags > 0 is nonzero + # dic['outer_edge'] = np.any( + # np.abs(val_matrix[u, v][1:]) >= sig_thres[u, v][1:]) + dic["outer_edge"] = np.any(graph[u, v, 1:]) + else: + dic["outer_edge"] = False + dic["outer_edge_alpha"] = alpha + if link_width is None: + # fraction of nonzero values + dic["outer_edge_width"] = arrow_linewidth + else: + dic["outer_edge_width"] = ( + link_width[u, v, argmax] / link_width.max() * arrow_linewidth + ) + + # value at argmax of average + dic["outer_edge_color"] = val_matrix[u, v][argmax] + all_strengths.append(dic["outer_edge_color"]) + + # Sorted list of significant lags (only if robust wrt + # d['min_ensemble_frac']) + if tau_max > 0: + lags = np.abs(val_matrix[u, v][1:]).argsort()[::-1] + 1 + sig_lags = (np.where(graph[u, v, 1:])[0] + 1).tolist() + else: + lags, sig_lags = [], [] + if lag_array is not None: + dic["label"] = str([lag_array[l] for l in lags if l in sig_lags])[1:-1] + else: + dic["label"] = str([l for l in lags if l in sig_lags])[1:-1] + else: + # Node color is max of average autodependency + node_color[u] = val_matrix[u, v][argmax] + + # dic['outer_edge_edge'] = False + # dic['outer_edge_edgecolor'] = None + # dic['inner_edge_edge'] = False + # dic['inner_edge_edgecolor'] = None + + node_color = path_node_array + # print node_color + # If no links are present, set value to zero + if len(all_strengths) == 0: + all_strengths = [0.0] + + if node_pos is None: + pos = nx.circular_layout(deepcopy(G)) + # pos = nx.spring_layout(deepcopy(G)) + else: + pos = {} + for i in range(N): + pos[i] = (node_pos["x"][i], node_pos["y"][i]) + + node_rings = { + 0: { + "sizes": None, + "color_array": node_color, + "cmap": cmap_nodes, + "vmin": vmin_nodes, + "vmax": vmax_nodes, + "ticks": node_ticks, + "label": node_colorbar_label, + "colorbar": True, + } + } + + _draw_network_with_curved_edges( + fig=fig, + ax=ax, + G=deepcopy(G), + pos=pos, + # dictionary of rings: {0:{'sizes':(N,)-array, 'color_array':(N,)-array + # or None, 'cmap':string, + node_rings=node_rings, + # 'vmin':float or None, 'vmax':float or None, 'label':string or None}} + node_labels=var_names, + node_label_size=node_label_size, + node_alpha=alpha, + standard_size=node_size, + node_aspect=node_aspect, + standard_cmap="OrRd", + standard_color_nodes=standard_color_nodes, + standard_color_links=standard_color_links, + log_sizes=False, + cmap_links=cmap_edges, + links_vmin=vmin_edges, + links_vmax=vmax_edges, + links_ticks=edge_ticks, + # cmap_links_edges='YlOrRd', links_edges_vmin=-1., links_edges_vmax=1., + # links_edges_ticks=.2, link_edge_colorbar_label='link_edge', + arrowhead_size=arrowhead_size, + curved_radius=curved_radius, + label_fontsize=label_fontsize, + link_label_fontsize=link_label_fontsize, + link_colorbar_label=link_colorbar_label, + network_lower_bound=network_lower_bound, + # label_fraction=label_fraction, + # inner_edge_style=inner_edge_style + ) + + # fig.subplots_adjust(left=0.1, right=.9, bottom=.25, top=.95) + # savestring = os.path.expanduser(save_name) + if save_name is not None: + pyplot.savefig(save_name) + else: + pyplot.show()
+ + +# +# Functions to plot time series graphs from links including ancestors +# +
[docs]def plot_tsg(links, X, Y, Z=None, anc_x=None, anc_y=None, anc_xy=None): + """Plots TSG that is input in format (N*max_lag, N*max_lag). + Compared to the tigramite plotting function here links + X^i_{t-tau} --> X^j_t can be missing for different t'. Helpful to + visualize the conditioned TSG. + """ + + def varlag2node(var, lag): + """Translate from (var, lag) notation to node in TSG. + lag must be <= 0. + """ + return var * max_lag + lag + + def node2varlag(node): + """Translate from node in TSG to (var, -tau) notation. + Here tau is <= 0. + """ + var = node // max_lag + tau = node % (max_lag) - (max_lag - 1) + return var, tau + + def _get_minmax_lag(links): + """Helper function to retrieve tau_min and tau_max from links + """ + + N = len(links) + + # Get maximum time lag + min_lag = np.inf + max_lag = 0 + for j in range(N): + for link_props in links[j]: + var, lag = link_props[0] + coeff = link_props[1] + # func = link_props[2] + if coeff != 0.: + min_lag = min(min_lag, abs(lag)) + max_lag = max(max_lag, abs(lag)) + return min_lag, max_lag + + def _links_to_tsg(link_coeffs, max_lag=None): + """Transform link_coeffs to time series graph. + TSG is of shape (N*max_lag, N*max_lag). + """ + N = len(link_coeffs) + + # Get maximum lag + min_lag_links, max_lag_links = _get_minmax_lag(link_coeffs) + + # max_lag of TSG is max lag in links + 1 for the zero lag. + if max_lag is None: + max_lag = max_lag_links + 1 + + tsg = np.zeros((N * max_lag, N * max_lag)) + + for j in range(N): + for link_props in link_coeffs[j]: + i, lag = link_props[0] + tau = abs(lag) + coeff = link_props[1] + # func = link_props[2] + if coeff != 0.0: + for t in range(max_lag): + if ( + 0 <= varlag2node(i, t - tau) + and varlag2node(i, t - tau) % max_lag + <= varlag2node(j, t) % max_lag + ): + tsg[varlag2node(i, t - tau), varlag2node(j, t)] = 1.0 + + return tsg + + color_list = ["lightgrey", "grey", "black", "red", "blue", "orange"] + listcmap = ListedColormap(color_list) + + N = len(links) + + min_lag_links, max_lag_links = _get_minmax_lag(links) + max_lag = max_lag_links + + for anc in X + Y: + max_lag = max(max_lag, abs(anc[1])) + for anc in Y: + max_lag = max(max_lag, abs(anc[1])) + if Z is not None: + for anc in Z: + max_lag = max(max_lag, abs(anc[1])) + + if anc_x is not None: + for anc in anc_x: + max_lag = max(max_lag, abs(anc[1])) + if anc_y is not None: + for anc in anc_y: + max_lag = max(max_lag, abs(anc[1])) + if anc_xy is not None: + for anc in anc_xy: + max_lag = max(max_lag, abs(anc[1])) + + max_lag = max_lag + 1 + + tsg = _links_to_tsg(links, max_lag=max_lag) + + G = nx.DiGraph(tsg) + + figsize = (3, 3) + link_colorbar_label = "MCI" + arrow_linewidth = 8.0 + vmin_edges = -1 + vmax_edges = 1.0 + edge_ticks = 0.4 + cmap_edges = "RdBu_r" + order = None + node_size = .1 + arrowhead_size = 20 + curved_radius = 0.2 + label_fontsize = 10 + alpha = 1.0 + node_label_size = 10 + label_space_left = 0.1 + label_space_top = 0.0 + network_lower_bound = 0.2 + inner_edge_style = "dashed" + + node_color = np.ones(N * max_lag) # , dtype = 'object') + node_color[:] = 0 + + if anc_x is not None: + for n in [varlag2node(itau[0], max_lag - 1 + itau[1]) for itau in anc_x]: + node_color[n] = 3 + if anc_y is not None: + for n in [varlag2node(itau[0], max_lag - 1 + itau[1]) for itau in anc_y]: + node_color[n] = 4 + if anc_xy is not None: + for n in [varlag2node(itau[0], max_lag - 1 + itau[1]) for itau in anc_xy]: + node_color[n] = 5 + + for x in X: + node_color[varlag2node(x[0], max_lag - 1 + x[1])] = 2 + for y in Y: + node_color[varlag2node(y[0], max_lag - 1 + y[1])] = 2 + if Z is not None: + for z in Z: + node_color[varlag2node(z[0], max_lag - 1 + z[1])] = 1 + + fig = pyplot.figure(figsize=figsize) + ax = fig.add_subplot(111, frame_on=False) + var_names = range(N) + order = range(N) + + # list of all strengths for color map + all_strengths = [] + # Add attributes, contemporaneous and lagged links are handled separately + for (u, v, dic) in G.edges(data=True): + if u != v: + if tsg[u, v] and tsg[v, u]: + dic["inner_edge"] = True + dic["outer_edge"] = False + else: + dic["inner_edge"] = False + dic["outer_edge"] = True + + dic["inner_edge_alpha"] = alpha + dic["inner_edge_color"] = tsg[u, v] + + dic["inner_edge_width"] = arrow_linewidth + dic["inner_edge_attribute"] = dic["outer_edge_attribute"] = None + + all_strengths.append(dic["inner_edge_color"]) + dic["outer_edge_alpha"] = alpha + dic["outer_edge_width"] = dic["inner_edge_width"] = arrow_linewidth + + # value at argmax of average + dic["outer_edge_color"] = tsg[u, v] + + all_strengths.append(dic["outer_edge_color"]) + dic["label"] = None + + # If no links are present, set value to zero + if len(all_strengths) == 0: + all_strengths = [0.0] + + posarray = np.zeros((N * max_lag, 2)) + for i in range(N * max_lag): + posarray[i] = np.array([(i % max_lag), (1.0 - i // max_lag)]) + + pos_tmp = {} + for i in range(N * max_lag): + pos_tmp[i] = np.array( + [ + ((i % max_lag) - posarray.min(axis=0)[0]) + / (posarray.max(axis=0)[0] - posarray.min(axis=0)[0]), + ((1.0 - i // max_lag) - posarray.min(axis=0)[1]) + / (posarray.max(axis=0)[1] - posarray.min(axis=0)[1]), + ] + ) + pos_tmp[i][np.isnan(pos_tmp[i])] = 0.0 + + pos = {} + for n in range(N): + for tau in range(max_lag): + pos[n * max_lag + tau] = pos_tmp[order[n] * max_lag + tau] + + node_rings = { + 0: { + "sizes": None, + "color_array": node_color, + "label": "", + "colorbar": False, + "cmap": listcmap, + "vmin": 0, + "vmax": len(color_list), + } + } + + node_labels = ["" for i in range(N * max_lag)] + + _draw_network_with_curved_edges( + fig=fig, + ax=ax, + G=deepcopy(G), + pos=pos, + node_rings=node_rings, + node_labels=node_labels, + node_label_size=node_label_size, + node_alpha=alpha, + standard_size=node_size, + node_aspect=None, + standard_cmap="OrRd", + standard_color_links='black', + standard_color_nodes='lightgrey', + log_sizes=False, + cmap_links=cmap_edges, + links_vmin=vmin_edges, + links_vmax=vmax_edges, + links_ticks=edge_ticks, + arrowstyle="simple", + arrowhead_size=arrowhead_size, + curved_radius=curved_radius, + label_fontsize=label_fontsize, + label_fraction=0.5, + link_colorbar_label=link_colorbar_label, + inner_edge_curved=True, + network_lower_bound=network_lower_bound, + inner_edge_style=inner_edge_style, + ) + + for i in range(N): + trans = transforms.blended_transform_factory(fig.transFigure, ax.transData) + ax.text( + label_space_left, + pos[order[i] * max_lag][1], + "%s" % str(var_names[order[i]]), + fontsize=label_fontsize, + horizontalalignment="left", + verticalalignment="center", + transform=trans, + ) + + for tau in np.arange(max_lag - 1, -1, -1): + trans = transforms.blended_transform_factory(ax.transData, fig.transFigure) + if tau == max_lag - 1: + ax.text( + pos[tau][0], + 1.0 - label_space_top, + r"$t$", + fontsize=int(label_fontsize * 0.7), + horizontalalignment="center", + verticalalignment="top", + transform=trans, + ) + else: + ax.text( + pos[tau][0], + 1.0 - label_space_top, + r"$t-%s$" % str(max_lag - tau - 1), + fontsize=int(label_fontsize * 0.7), + horizontalalignment="center", + verticalalignment="top", + transform=trans, + ) + + return fig, ax
+ + +if __name__ == "__main__": + + val_matrix = np.zeros((4, 4, 3)) + + # Complete test case + graph = np.zeros((3,3,2), dtype='<U3') + + graph[0, 1, 0] = "<-+" + graph[1, 0, 0] = "+->" + + # graph[1, 2, 0] = "x->" + # graph[2, 1, 0] = "<-x" + + # graph[0, 2, 0] = "x->" + # graph[2, 0, 0] = "<-x" + nolinks = np.zeros(graph.shape) + # nolinks[range(4), range(4), 1] = 1 + + # plot_time_series_graph(graph=nolinks) + plot_graph(graph=graph, + save_name="/home/rung_ja/Downloads/tsg_test.pdf") + + # pyplot.show() + +
+ +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/docs/_build/html/_sources/index.rst.txt b/docs/_build/html/_sources/index.rst.txt new file mode 100644 index 00000000..ff22f4b8 --- /dev/null +++ b/docs/_build/html/_sources/index.rst.txt @@ -0,0 +1,150 @@ +.. Tigramite documentation master file, created by + sphinx-quickstart on Thu May 11 18:32:05 2017. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +TIGRAMITE +========= + +`Github repo `_ + +Tigramite is a causal time series analysis python package. It allows to efficiently estimate causal graphs from high-dimensional time series datasets (causal discovery) and to use these graphs for robust forecasting and the estimation and prediction of direct, total, and mediated effects. Causal discovery is based on linear as well as non-parametric conditional independence tests applicable to discrete or continuously-valued time series. Also includes functions for high-quality plots of the results. Please cite the following papers depending on which method you use: + + +- PCMCI: J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic, Detecting and quantifying causal associations in large nonlinear time series datasets. Sci. Adv. 5, eaau4996 (2019). https://advances.sciencemag.org/content/5/11/eaau4996 + +- PCMCI+: J. Runge (2020): Discovering contemporaneous and lagged causal relations in autocorrelated nonlinear time series datasets. Proceedings of the 36th Conference on Uncertainty in Artificial Intelligence, UAI 2020,Toronto, Canada, 2019, AUAI Press, 2020. http://auai.org/uai2020/proceedings/579_main_paper.pdf + +- LPCMCI: Gerhardus, A. & Runge, J. High-recall causal discovery for autocorrelated time series with latent confounders Advances in Neural Information Processing Systems, 2020, 33. https://proceedings.neurips.cc/paper/2020/hash/94e70705efae423efda1088614128d0b-Abstract.html + +- Generally: J. Runge (2018): Causal Network Reconstruction from Time Series: From Theoretical Assumptions to Practical Estimation. Chaos: An Interdisciplinary Journal of Nonlinear Science 28 (7): 075310. https://aip.scitation.org/doi/10.1063/1.5025050 + +- Nature Communications Perspective paper: https://www.nature.com/articles/s41467-019-10105-3 + +- Causal effects: J. Runge, Necessary and sufficient graphical conditions for optimal adjustment sets in causal graphical models with hidden variables, Advances in Neural Information Processing Systems, 2021, 34 + +- Mediation class: J. Runge et al. (2015): Identifying causal gateways and mediators in complex spatio-temporal systems. Nature Communications, 6, 8502. http://doi.org/10.1038/ncomms9502 + +- Mediation class: J. Runge (2015): Quantifying information transfer and mediation along causal pathways in complex systems. Phys. Rev. E, 92(6), 62829. http://doi.org/10.1103/PhysRevE.92.062829 + +- CMIknn: J. Runge (2018): Conditional Independence Testing Based on a Nearest-Neighbor Estimator of Conditional Mutual Information. In Proceedings of the 21st International Conference on Artificial Intelligence and Statistics. http://proceedings.mlr.press/v84/runge18a.html + + + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + +.. autosummary:: + + tigramite.pcmci.PCMCI + tigramite.lpcmci.LPCMCI + tigramite.independence_tests.CondIndTest + tigramite.independence_tests.ParCorr + tigramite.independence_tests.GPDC + tigramite.independence_tests.GPDCtorch + tigramite.independence_tests.CMIknn + tigramite.independence_tests.CMIsymb + tigramite.independence_tests.OracleCI + tigramite.causal_effects.CausalEffects + tigramite.models.Models + tigramite.models.LinearMediation + tigramite.models.Prediction + tigramite.data_processing + tigramite.toymodels.structural_causal_processes + tigramite.plotting + + +:mod:`tigramite.pcmci`: PCMCI +=========================================== + +.. autoclass:: tigramite.pcmci.PCMCI + :members: + + +:mod:`tigramite.lpcmci`: LPCMCI +=========================================== + +.. autoclass:: tigramite.lpcmci.LPCMCI + :members: + + +:mod:`tigramite.independence_tests`: Conditional independence tests +================================================================================= + +Base class: + +.. autoclass:: tigramite.independence_tests.CondIndTest + :members: + +Test statistics: + +.. autoclass:: tigramite.independence_tests.ParCorr + :members: + +.. autoclass:: tigramite.independence_tests.GPDC + :members: + +.. autoclass:: tigramite.independence_tests.GPDCtorch + :members: + +.. autoclass:: tigramite.independence_tests.CMIknn + :members: + +.. autoclass:: tigramite.independence_tests.CMIsymb + :members: + +.. autoclass:: tigramite.independence_tests.OracleCI + :members: + + +:mod:`tigramite.causal_effects`: Causal Effect analysis +=========================================================== + +.. autoclass:: tigramite.causal_effects.CausalEffects + :members: + + +:mod:`tigramite.models`: Time series modeling, mediation, and prediction +======================================================================== + +Base class: + +.. autoclass:: tigramite.models.Models + :members: + +Derived classes: + +.. autoclass:: tigramite.models.LinearMediation + :members: + +.. autoclass:: tigramite.models.Prediction + :members: + + +:mod:`tigramite.data_processing`: Data processing functions +=========================================================== + +.. automodule:: tigramite.data_processing + :members: + + +:mod:`tigramite.toymodels`: Toy model generators +=========================================================== + +.. automodule:: tigramite.toymodels + :members: + + +:mod:`tigramite.plotting`: Plotting functions +============================================= + +.. automodule:: tigramite.plotting + :members: + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/_build/html/_static/ajax-loader.gif b/docs/_build/html/_static/ajax-loader.gif new file mode 100644 index 0000000000000000000000000000000000000000..61faf8cab23993bd3e1560bff0668bd628642330 GIT binary patch literal 673 zcmZ?wbhEHb6krfw_{6~Q|Nno%(3)e{?)x>&1u}A`t?OF7Z|1gRivOgXi&7IyQd1Pl zGfOfQ60;I3a`F>X^fL3(@);C=vM_KlFfb_o=k{|A33hf2a5d61U}gjg=>Rd%XaNQW zW@Cw{|b%Y*pl8F?4B9 zlo4Fz*0kZGJabY|>}Okf0}CCg{u4`zEPY^pV?j2@h+|igy0+Kz6p;@SpM4s6)XEMg z#3Y4GX>Hjlml5ftdH$4x0JGdn8~MX(U~_^d!Hi)=HU{V%g+mi8#UGbE-*ao8f#h+S z2a0-5+vc7MU$e-NhmBjLIC1v|)9+Im8x1yacJ7{^tLX(ZhYi^rpmXm0`@ku9b53aN zEXH@Y3JaztblgpxbJt{AtE1ad1Ca>{v$rwwvK(>{m~Gf_=-Ro7Fk{#;i~+{{>QtvI yb2P8Zac~?~=sRA>$6{!(^3;ZP0TPFR(G_-UDU(8Jl0?(IXu$~#4A!880|o%~Al1tN literal 0 HcmV?d00001 diff --git a/docs/_build/html/_static/alabaster.css b/docs/_build/html/_static/alabaster.css new file mode 100644 index 00000000..bc420a48 --- /dev/null +++ b/docs/_build/html/_static/alabaster.css @@ -0,0 +1,593 @@ + + + + + + + + + + + + + + + + + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif; + font-size: 17px; + background-color: white; + color: #000; + margin: 0; + padding: 0; +} + +div.document { + width: 940px; + margin: 30px auto 0 auto; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 220px; +} + +div.sphinxsidebar { + width: 220px; +} + +hr { + border: 1px solid #B1B4B6; +} + +div.body { + background-color: #ffffff; + color: #3E4349; + padding: 0 30px 0 30px; +} + +div.body > .section { + text-align: left; +} + +div.footer { + width: 940px; + margin: 20px auto 30px auto; + font-size: 14px; + color: #888; + text-align: right; +} + +div.footer a { + color: #888; +} + + +div.relations { + display: none; +} + + +div.sphinxsidebar a { + color: #444; + text-decoration: none; + border-bottom: 1px dotted #999; +} + +div.sphinxsidebar a:hover { + border-bottom: 1px solid #999; +} + +div.sphinxsidebar { + font-size: 14px; + line-height: 1.5; +} + +div.sphinxsidebarwrapper { + padding: 18px 10px; +} + +div.sphinxsidebarwrapper p.logo { + padding: 0; + margin: -10px 0 0 0px; + text-align: center; +} + +div.sphinxsidebarwrapper h1.logo { + margin-top: -10px; + text-align: center; + margin-bottom: 5px; + text-align: left; +} + +div.sphinxsidebarwrapper h1.logo-name { + margin-top: 0px; +} + +div.sphinxsidebarwrapper p.blurb { + margin-top: 0; + font-style: normal; +} + +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + font-family: 'Garamond', 'Georgia', serif; + color: #444; + font-size: 24px; + font-weight: normal; + margin: 0 0 5px 0; + padding: 0; +} + +div.sphinxsidebar h4 { + font-size: 20px; +} + +div.sphinxsidebar h3 a { + color: #444; +} + +div.sphinxsidebar p.logo a, +div.sphinxsidebar h3 a, +div.sphinxsidebar p.logo a:hover, +div.sphinxsidebar h3 a:hover { + border: none; +} + +div.sphinxsidebar p { + color: #555; + margin: 10px 0; +} + +div.sphinxsidebar ul { + margin: 10px 0; + padding: 0; + color: #000; +} + +div.sphinxsidebar ul li.toctree-l1 > a { + font-size: 120%; +} + +div.sphinxsidebar ul li.toctree-l2 > a { + font-size: 110%; +} + +div.sphinxsidebar input { + border: 1px solid #CCC; + font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif; + font-size: 1em; +} + +div.sphinxsidebar hr { + border: none; + height: 1px; + color: #AAA; + background: #AAA; + + text-align: left; + margin-left: 0; + width: 50%; +} + +/* -- body styles ----------------------------------------------------------- */ + +a { + color: #004B6B; + text-decoration: underline; +} + +a:hover { + color: #6D4100; + text-decoration: underline; +} + +div.body h1, +div.body h2, +div.body h3, +div.body h4, +div.body h5, +div.body h6 { + font-family: 'Garamond', 'Georgia', serif; + font-weight: normal; + margin: 30px 0px 10px 0px; + padding: 0; +} + +div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } +div.body h2 { font-size: 180%; } +div.body h3 { font-size: 150%; } +div.body h4 { font-size: 130%; } +div.body h5 { font-size: 100%; } +div.body h6 { font-size: 100%; } + +a.headerlink { + color: #DDD; + padding: 0 4px; + text-decoration: none; +} + +a.headerlink:hover { + color: #444; + background: #EAEAEA; +} + +div.body p, div.body dd, div.body li { + line-height: 1.4em; +} + +div.admonition { + margin: 20px 0px; + padding: 10px 30px; + background-color: #FCC; + border: 1px solid #FAA; +} + +div.admonition tt.xref, div.admonition a tt { + border-bottom: 1px solid #fafafa; +} + +dd div.admonition { + margin-left: -60px; + padding-left: 60px; +} + +div.admonition p.admonition-title { + font-family: 'Garamond', 'Georgia', serif; + font-weight: normal; + font-size: 24px; + margin: 0 0 10px 0; + padding: 0; + line-height: 1; +} + +div.admonition p.last { + margin-bottom: 0; +} + +div.highlight { + background-color: white; +} + +dt:target, .highlight { + background: #FAF3E8; +} + +div.note { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.seealso { + background-color: #EEE; + border: 1px solid #CCC; +} + +div.topic { + background-color: #eee; +} + +p.admonition-title { + display: inline; +} + +p.admonition-title:after { + content: ":"; +} + +pre, tt, code { + font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; + font-size: 0.9em; +} + +.hll { + background-color: #FFC; + margin: 0 -12px; + padding: 0 12px; + display: block; +} + +img.screenshot { +} + +tt.descname, tt.descclassname, code.descname, code.descclassname { + font-size: 0.95em; +} + +tt.descname, code.descname { + padding-right: 0.08em; +} + +img.screenshot { + -moz-box-shadow: 2px 2px 4px #eee; + -webkit-box-shadow: 2px 2px 4px #eee; + box-shadow: 2px 2px 4px #eee; +} + +table.docutils { + border: 1px solid #888; + -moz-box-shadow: 2px 2px 4px #eee; + -webkit-box-shadow: 2px 2px 4px #eee; + box-shadow: 2px 2px 4px #eee; +} + +table.docutils td, table.docutils th { + border: 1px solid #888; + padding: 0.25em 0.7em; +} + +table.field-list, table.footnote { + border: none; + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} + +table.footnote { + margin: 15px 0; + width: 100%; + border: 1px solid #EEE; + background: #FDFDFD; + font-size: 0.9em; +} + +table.footnote + table.footnote { + margin-top: -15px; + border-top: none; +} + +table.field-list th { + padding: 0 0.8em 0 0; +} + +table.field-list td { + padding: 0; +} + +table.field-list p { + margin-bottom: 0.8em; +} + +table.footnote td.label { + width: 0px; + padding: 0.3em 0 0.3em 0.5em; +} + +table.footnote td { + padding: 0.3em 0.5em; +} + +dl { + margin: 0; + padding: 0; +} + +dl dd { + margin-left: 30px; +} + +blockquote { + margin: 0 0 0 30px; + padding: 0; +} + +ul, ol { + margin: 10px 0 10px 30px; + padding: 0; +} + +pre { + background: #EEE; + padding: 7px 30px; + margin: 15px 0px; + line-height: 1.3em; +} + +dl pre, blockquote pre, li pre { + margin-left: 0; + padding-left: 30px; +} + +dl dl pre { + margin-left: -90px; + padding-left: 90px; +} + +tt, code { + background-color: #ecf0f3; + color: #222; + /* padding: 1px 2px; */ +} + +tt.xref, code.xref, a tt { + background-color: #FBFBFB; + border-bottom: 1px solid white; +} + +a.reference { + text-decoration: none; + border-bottom: 1px dotted #004B6B; +} + +a.reference:hover { + border-bottom: 1px solid #6D4100; +} + +a.footnote-reference { + text-decoration: none; + font-size: 0.7em; + vertical-align: top; + border-bottom: 1px dotted #004B6B; +} + +a.footnote-reference:hover { + border-bottom: 1px solid #6D4100; +} + +a:hover tt, a:hover code { + background: #EEE; +} + + +@media screen and (max-width: 870px) { + + div.sphinxsidebar { + display: none; + } + + div.document { + width: 100%; + + } + + div.documentwrapper { + margin-left: 0; + margin-top: 0; + margin-right: 0; + margin-bottom: 0; + } + + div.bodywrapper { + margin-top: 0; + margin-right: 0; + margin-bottom: 0; + margin-left: 0; + } + + ul { + margin-left: 0; + } + + .document { + width: auto; + } + + .footer { + width: auto; + } + + .bodywrapper { + margin: 0; + } + + .footer { + width: auto; + } + + .github { + display: none; + } + + + +} + + + +@media screen and (max-width: 875px) { + + body { + margin: 0; + padding: 20px 30px; + } + + div.documentwrapper { + float: none; + background: white; + } + + div.sphinxsidebar { + display: block; + float: none; + width: 102.5%; + margin: 50px -30px -20px -30px; + padding: 10px 20px; + background: #333; + color: #FFF; + } + + div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, + div.sphinxsidebar h3 a { + color: white; + } + + div.sphinxsidebar a { + color: #AAA; + } + + div.sphinxsidebar p.logo { + display: none; + } + + div.document { + width: 100%; + margin: 0; + } + + div.footer { + display: none; + } + + div.bodywrapper { + margin: 0; + } + + div.body { + min-height: 0; + padding: 0; + } + + .rtd_doc_footer { + display: none; + } + + .document { + width: auto; + } + + .footer { + width: auto; + } + + .footer { + width: auto; + } + + .github { + display: none; + } +} + + +/* misc. */ + +.revsys-inline { + display: none!important; +} + +/* Make nested-list/multi-paragraph items look better in Releases changelog + * pages. Without this, docutils' magical list fuckery causes inconsistent + * formatting between different release sub-lists. + */ +div#changelog > div.section > ul > li > p:only-child { + margin-bottom: 0; +} + +/* Hide fugly table cell borders in ..bibliography:: directive output */ +table.docutils.citation, table.docutils.citation td, table.docutils.citation th { + border: none; + /* Below needed in some edge cases; if not applied, bottom shadows appear */ + -moz-box-shadow: none; + -webkit-box-shadow: none; + box-shadow: none; +} \ No newline at end of file diff --git a/docs/_build/html/_static/basic.css b/docs/_build/html/_static/basic.css new file mode 100644 index 00000000..dc88b5a2 --- /dev/null +++ b/docs/_build/html/_static/basic.css @@ -0,0 +1,632 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox input[type="text"] { + width: 170px; +} + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li div.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px 7px 0 7px; + background-color: #ffe; + width: 40%; + float: right; +} + +p.sidebar-title { + font-weight: bold; +} + +/* -- topics ---------------------------------------------------------------- */ + +div.topic { + border: 1px solid #ccc; + padding: 7px 7px 0 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +div.admonition dl { + margin-bottom: 0; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + border: 0; + border-collapse: collapse; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +table.footnote td, table.footnote th { + border: 0 !important; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +dl { + margin-bottom: 15px; +} + +dd p { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +dt:target, .highlighted { + background-color: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; +} + +td.linenos pre { + padding: 5px 0px; + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + margin-left: 0.5em; +} + +table.highlighttable td { + padding: 0 0.5em 0 0.5em; +} + +div.code-block-caption { + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +div.code-block-caption + div > div.highlight > pre { + margin-top: 0; +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + padding: 1em 1em 0; +} + +div.literal-block-wrapper div.highlight { + margin: 0; +} + +code.descname { + background-color: transparent; + font-weight: bold; + font-size: 1.2em; +} + +code.descclassname { + background-color: transparent; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: relative; + left: 0px; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/docs/_build/html/_static/comment-bright.png b/docs/_build/html/_static/comment-bright.png new file mode 100644 index 0000000000000000000000000000000000000000..15e27edb12ac25701ac0ac21b97b52bb4e45415e GIT binary patch literal 756 zcmVgfIX78 z$8Pzv({A~p%??+>KickCb#0FM1rYN=mBmQ&Nwp<#JXUhU;{|)}%&s>suq6lXw*~s{ zvHx}3C%<;wE5CH!BR{p5@ml9ws}y)=QN-kL2?#`S5d*6j zk`h<}j1>tD$b?4D^N9w}-k)bxXxFg>+#kme^xx#qg6FI-%iv2U{0h(Y)cs%5a|m%Pn_K3X_bDJ>EH#(Fb73Z zfUt2Q3B>N+ot3qb*DqbTZpFIn4a!#_R-}{?-~Hs=xSS6p&$sZ-k1zDdtqU`Y@`#qL z&zv-~)Q#JCU(dI)Hf;$CEnK=6CK50}q7~wdbI->?E07bJ0R;!GSQTs5Am`#;*WHjvHRvY?&$Lm-vq1a_BzocI^ULXV!lbMd%|^B#fY;XX)n<&R^L z=84u1e_3ziq;Hz-*k5~zwY3*oDKt0;bM@M@@89;@m*4RFgvvM_4;5LB!@OB@^WbVT zjl{t;a8_>od-~P4 m{5|DvB&z#xT;*OnJqG}gk~_7HcNkCr0000W zanA~u9RIXo;n7c96&U)YLgs-FGlx~*_c{Jgvesu1E5(8YEf&5wF=YFPcRe@1=MJmi zag(L*xc2r0(slpcN!vC5CUju;vHJkHc*&70_n2OZsK%O~A=!+YIw z7zLLl7~Z+~RgWOQ=MI6$#0pvpu$Q43 zP@36QAmu6!_9NPM?o<1_!+stoVRRZbW9#SPe!n;#A_6m8f}|xN1;H{`0RoXQ2LM47 zt(g;iZ6|pCb@h2xk&(}S3=EVBUO0e90m2Lp5CB<(SPIaB;n4))3JB87Or#XPOPcum z?<^(g+m9}VNn4Y&B`g8h{t_$+RB1%HKRY6fjtd-<7&EsU;vs0GM(Lmbhi%Gwcfs0FTF}T zL{_M6Go&E0Eg8FuB*(Yn+Z*RVTBE@10eIOb3El^MhO`GabDll(V0&FlJi2k^;q8af zkENdk2}x2)_KVp`5OAwXZM;dG0?M-S)xE1IKDi6BY@5%Or?#aZ9$gcX)dPZ&wA1a< z$rFXHPn|TBf`e?>Are8sKtKrKcjF$i^lp!zkL?C|y^vlHr1HXeVJd;1I~g&Ob-q)& z(fn7s-KI}G{wnKzg_U5G(V%bX6uk zIa+<@>rdmZYd!9Y=C0cuchrbIjuRB_Wq{-RXlic?flu1*_ux}x%(HDH&nT`k^xCeC ziHi1!ChH*sQ6|UqJpTTzX$aw8e(UfcS^f;6yBWd+(1-70zU(rtxtqR%j z-lsH|CKQJXqD{+F7V0OTv8@{~(wp(`oIP^ZykMWgR>&|RsklFMCnOo&Bd{le} zV5F6424Qzl;o2G%oVvmHgRDP9!=rK8fy^!yV8y*4p=??uIRrrr0?>O!(z*g5AvL2!4z0{sq%vhG*Po}`a<6%kTK5TNhtC8}rXNu&h^QH4A&Sk~Autm*s~45(H7+0bi^MraaRVzr05hQ3iK?j` zR#U@^i0WhkIHTg29u~|ypU?sXCQEQgXfObPW;+0YAF;|5XyaMAEM0sQ@4-xCZe=0e z7r$ofiAxn@O5#RodD8rh5D@nKQ;?lcf@tg4o+Wp44aMl~c47azN_(im0N)7OqdPBC zGw;353_o$DqGRDhuhU$Eaj!@m000000NkvXXu0mjfjZ7Z_ literal 0 HcmV?d00001 diff --git a/docs/_build/html/_static/contents.png b/docs/_build/html/_static/contents.png new file mode 100644 index 0000000000000000000000000000000000000000..6c59aa1f9c8c3b754b258b8ab4f6b95971c99109 GIT binary patch literal 107 zcmeAS@N?(olHy`uVBq!ia0vp^j6kfx!2~2XTwzxLQbwLGjv*C{Q@c%>8XN?UO#1VG zcLb|!+10i0Jzf{Gv>fyFaQYL)bKk!I{mJd!3^2Uu$-u=wds-dX_E&EV= 0 && !jQuery(node.parentNode).hasClass(className)) { + var span = document.createElement("span"); + span.className = className; + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this); + }); + } + } + return this.each(function() { + highlight(this); + }); +}; + +/* + * backward compatibility for jQuery.browser + * This will be supported until firefox bug is fixed. + */ +if (!jQuery.browser) { + jQuery.uaMatch = function(ua) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || + /(webkit)[ \/]([\w.]+)/.exec(ua) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || + /(msie) ([\w.]+)/.exec(ua) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; + }; + jQuery.browser = {}; + jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; +} + +/** + * Small JavaScript module for the documentation. + */ +var Documentation = { + + init : function() { + this.fixFirefoxAnchorBug(); + this.highlightSearchWords(); + this.initIndexTable(); + + }, + + /** + * i18n support + */ + TRANSLATIONS : {}, + PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; }, + LOCALE : 'unknown', + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext : function(string) { + var translated = Documentation.TRANSLATIONS[string]; + if (typeof translated == 'undefined') + return string; + return (typeof translated == 'string') ? translated : translated[0]; + }, + + ngettext : function(singular, plural, n) { + var translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated == 'undefined') + return (n == 1) ? singular : plural; + return translated[Documentation.PLURALEXPR(n)]; + }, + + addTranslations : function(catalog) { + for (var key in catalog.messages) + this.TRANSLATIONS[key] = catalog.messages[key]; + this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); + this.LOCALE = catalog.locale; + }, + + /** + * add context elements like header anchor links + */ + addContextElements : function() { + $('div[id] > :header:first').each(function() { + $('\u00B6'). + attr('href', '#' + this.id). + attr('title', _('Permalink to this headline')). + appendTo(this); + }); + $('dt[id]').each(function() { + $('\u00B6'). + attr('href', '#' + this.id). + attr('title', _('Permalink to this definition')). + appendTo(this); + }); + }, + + /** + * workaround a firefox stupidity + * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075 + */ + fixFirefoxAnchorBug : function() { + if (document.location.hash) + window.setTimeout(function() { + document.location.href += ''; + }, 10); + }, + + /** + * highlight the search words provided in the url in the text + */ + highlightSearchWords : function() { + var params = $.getQueryParameters(); + var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; + if (terms.length) { + var body = $('div.body'); + if (!body.length) { + body = $('body'); + } + window.setTimeout(function() { + $.each(terms, function() { + body.highlightText(this.toLowerCase(), 'highlighted'); + }); + }, 10); + $('') + .appendTo($('#searchbox')); + } + }, + + /** + * init the domain index toggle buttons + */ + initIndexTable : function() { + var togglers = $('img.toggler').click(function() { + var src = $(this).attr('src'); + var idnum = $(this).attr('id').substr(7); + $('tr.cg-' + idnum).toggle(); + if (src.substr(-9) == 'minus.png') + $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); + else + $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); + }).css('display', ''); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { + togglers.click(); + } + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords : function() { + $('#searchbox .highlight-link').fadeOut(300); + $('span.highlighted').removeClass('highlighted'); + }, + + /** + * make the url absolute + */ + makeURL : function(relativeURL) { + return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; + }, + + /** + * get the current relative url + */ + getCurrentURL : function() { + var path = document.location.pathname; + var parts = path.split(/\//); + $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { + if (this == '..') + parts.pop(); + }); + var url = parts.join('/'); + return path.substring(url.lastIndexOf('/') + 1, path.length - 1); + }, + + initOnKeyListeners: function() { + $(document).keyup(function(event) { + var activeElementType = document.activeElement.tagName; + // don't navigate when in search box or textarea + if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT') { + switch (event.keyCode) { + case 37: // left + var prevHref = $('link[rel="prev"]').prop('href'); + if (prevHref) { + window.location.href = prevHref; + return false; + } + case 39: // right + var nextHref = $('link[rel="next"]').prop('href'); + if (nextHref) { + window.location.href = nextHref; + return false; + } + } + } + }); + } +}; + +// quick alias for translations +_ = Documentation.gettext; + +$(document).ready(function() { + Documentation.init(); +}); \ No newline at end of file diff --git a/docs/_build/html/_static/documentation_options.js b/docs/_build/html/_static/documentation_options.js new file mode 100644 index 00000000..8a5f4b08 --- /dev/null +++ b/docs/_build/html/_static/documentation_options.js @@ -0,0 +1,9 @@ +var DOCUMENTATION_OPTIONS = { + URL_ROOT: '', + VERSION: '4.0', + LANGUAGE: 'None', + COLLAPSE_INDEX: false, + FILE_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt' +}; \ No newline at end of file diff --git a/docs/_build/html/_static/down-pressed.png b/docs/_build/html/_static/down-pressed.png new file mode 100644 index 0000000000000000000000000000000000000000..5756c8cad8854722893dc70b9eb4bb0400343a39 GIT binary patch literal 222 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`OFdm2Ln;`PZ^+1>KjR?B@S0W7 z%OS_REiHONoJ6{+Ks@6k3590|7k9F+ddB6!zw3#&!aw#S`x}3V3&=A(a#84O-&F7T z^k3tZB;&iR9siw0|F|E|DAL<8r-F4!1H-;1{e*~yAKZN5f0|Ei6yUmR#Is)EM(Po_ zi`qJR6|P<~+)N+kSDgL7AjdIC_!O7Q?eGb+L+qOjm{~LLinM4NHn7U%HcK%uoMYO5 VJ~8zD2B3o(JYD@<);T3K0RV0%P>BEl literal 0 HcmV?d00001 diff --git a/docs/_build/html/_static/down.png b/docs/_build/html/_static/down.png new file mode 100644 index 0000000000000000000000000000000000000000..1b3bdad2ceffae91cee61b32f3295f9bbe646e48 GIT binary patch literal 202 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!60wlNoGJgf6CVIL!hEy=F?b*7pIY7kW{q%Rg zx!yQ<9v8bmJwa`TQk7YSw}WVQ()mRdQ;TC;* literal 0 HcmV?d00001 diff --git a/docs/_build/html/_static/file.png b/docs/_build/html/_static/file.png new file mode 100644 index 0000000000000000000000000000000000000000..a858a410e4faa62ce324d814e4b816fff83a6fb3 GIT binary patch literal 286 zcmV+(0pb3MP)s`hMrGg#P~ix$^RISR_I47Y|r1 z_CyJOe}D1){SET-^Amu_i71Lt6eYfZjRyw@I6OQAIXXHDfiX^GbOlHe=Ae4>0m)d(f|Me07*qoM6N<$f}vM^LjV8( literal 0 HcmV?d00001 diff --git a/docs/_build/html/_static/jquery-3.1.0.js b/docs/_build/html/_static/jquery-3.1.0.js new file mode 100644 index 00000000..f2fc2747 --- /dev/null +++ b/docs/_build/html/_static/jquery-3.1.0.js @@ -0,0 +1,10074 @@ +/*eslint-disable no-unused-vars*/ +/*! + * jQuery JavaScript Library v3.1.0 + * https://jquery.com/ + * + * Includes Sizzle.js + * https://sizzlejs.com/ + * + * Copyright jQuery Foundation and other contributors + * Released under the MIT license + * https://jquery.org/license + * + * Date: 2016-07-07T21:44Z + */ +( function( global, factory ) { + + "use strict"; + + if ( typeof module === "object" && typeof module.exports === "object" ) { + + // For CommonJS and CommonJS-like environments where a proper `window` + // is present, execute the factory and get jQuery. + // For environments that do not have a `window` with a `document` + // (such as Node.js), expose a factory as module.exports. + // This accentuates the need for the creation of a real `window`. + // e.g. var jQuery = require("jquery")(window); + // See ticket #14549 for more info. + module.exports = global.document ? + factory( global, true ) : + function( w ) { + if ( !w.document ) { + throw new Error( "jQuery requires a window with a document" ); + } + return factory( w ); + }; + } else { + factory( global ); + } + +// Pass this if window is not defined yet +} )( typeof window !== "undefined" ? window : this, function( window, noGlobal ) { + +// Edge <= 12 - 13+, Firefox <=18 - 45+, IE 10 - 11, Safari 5.1 - 9+, iOS 6 - 9.1 +// throw exceptions when non-strict code (e.g., ASP.NET 4.5) accesses strict mode +// arguments.callee.caller (trac-13335). But as of jQuery 3.0 (2016), strict mode should be common +// enough that all such attempts are guarded in a try block. +"use strict"; + +var arr = []; + +var document = window.document; + +var getProto = Object.getPrototypeOf; + +var slice = arr.slice; + +var concat = arr.concat; + +var push = arr.push; + +var indexOf = arr.indexOf; + +var class2type = {}; + +var toString = class2type.toString; + +var hasOwn = class2type.hasOwnProperty; + +var fnToString = hasOwn.toString; + +var ObjectFunctionString = fnToString.call( Object ); + +var support = {}; + + + + function DOMEval( code, doc ) { + doc = doc || document; + + var script = doc.createElement( "script" ); + + script.text = code; + doc.head.appendChild( script ).parentNode.removeChild( script ); + } +/* global Symbol */ +// Defining this global in .eslintrc would create a danger of using the global +// unguarded in another place, it seems safer to define global only for this module + + + +var + version = "3.1.0", + + // Define a local copy of jQuery + jQuery = function( selector, context ) { + + // The jQuery object is actually just the init constructor 'enhanced' + // Need init if jQuery is called (just allow error to be thrown if not included) + return new jQuery.fn.init( selector, context ); + }, + + // Support: Android <=4.0 only + // Make sure we trim BOM and NBSP + rtrim = /^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g, + + // Matches dashed string for camelizing + rmsPrefix = /^-ms-/, + rdashAlpha = /-([a-z])/g, + + // Used by jQuery.camelCase as callback to replace() + fcamelCase = function( all, letter ) { + return letter.toUpperCase(); + }; + +jQuery.fn = jQuery.prototype = { + + // The current version of jQuery being used + jquery: version, + + constructor: jQuery, + + // The default length of a jQuery object is 0 + length: 0, + + toArray: function() { + return slice.call( this ); + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + return num != null ? + + // Return just the one element from the set + ( num < 0 ? this[ num + this.length ] : this[ num ] ) : + + // Return all the elements in a clean array + slice.call( this ); + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems ) { + + // Build a new jQuery matched element set + var ret = jQuery.merge( this.constructor(), elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + // Return the newly-formed element set + return ret; + }, + + // Execute a callback for every element in the matched set. + each: function( callback ) { + return jQuery.each( this, callback ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map( this, function( elem, i ) { + return callback.call( elem, i, elem ); + } ) ); + }, + + slice: function() { + return this.pushStack( slice.apply( this, arguments ) ); + }, + + first: function() { + return this.eq( 0 ); + }, + + last: function() { + return this.eq( -1 ); + }, + + eq: function( i ) { + var len = this.length, + j = +i + ( i < 0 ? len : 0 ); + return this.pushStack( j >= 0 && j < len ? [ this[ j ] ] : [] ); + }, + + end: function() { + return this.prevObject || this.constructor(); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: push, + sort: arr.sort, + splice: arr.splice +}; + +jQuery.extend = jQuery.fn.extend = function() { + var options, name, src, copy, copyIsArray, clone, + target = arguments[ 0 ] || {}, + i = 1, + length = arguments.length, + deep = false; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + + // Skip the boolean and the target + target = arguments[ i ] || {}; + i++; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !jQuery.isFunction( target ) ) { + target = {}; + } + + // Extend jQuery itself if only one argument is passed + if ( i === length ) { + target = this; + i--; + } + + for ( ; i < length; i++ ) { + + // Only deal with non-null/undefined values + if ( ( options = arguments[ i ] ) != null ) { + + // Extend the base object + for ( name in options ) { + src = target[ name ]; + copy = options[ name ]; + + // Prevent never-ending loop + if ( target === copy ) { + continue; + } + + // Recurse if we're merging plain objects or arrays + if ( deep && copy && ( jQuery.isPlainObject( copy ) || + ( copyIsArray = jQuery.isArray( copy ) ) ) ) { + + if ( copyIsArray ) { + copyIsArray = false; + clone = src && jQuery.isArray( src ) ? src : []; + + } else { + clone = src && jQuery.isPlainObject( src ) ? src : {}; + } + + // Never move original objects, clone them + target[ name ] = jQuery.extend( deep, clone, copy ); + + // Don't bring in undefined values + } else if ( copy !== undefined ) { + target[ name ] = copy; + } + } + } + } + + // Return the modified object + return target; +}; + +jQuery.extend( { + + // Unique for each copy of jQuery on the page + expando: "jQuery" + ( version + Math.random() ).replace( /\D/g, "" ), + + // Assume jQuery is ready without the ready module + isReady: true, + + error: function( msg ) { + throw new Error( msg ); + }, + + noop: function() {}, + + isFunction: function( obj ) { + return jQuery.type( obj ) === "function"; + }, + + isArray: Array.isArray, + + isWindow: function( obj ) { + return obj != null && obj === obj.window; + }, + + isNumeric: function( obj ) { + + // As of jQuery 3.0, isNumeric is limited to + // strings and numbers (primitives or objects) + // that can be coerced to finite numbers (gh-2662) + var type = jQuery.type( obj ); + return ( type === "number" || type === "string" ) && + + // parseFloat NaNs numeric-cast false positives ("") + // ...but misinterprets leading-number strings, particularly hex literals ("0x...") + // subtraction forces infinities to NaN + !isNaN( obj - parseFloat( obj ) ); + }, + + isPlainObject: function( obj ) { + var proto, Ctor; + + // Detect obvious negatives + // Use toString instead of jQuery.type to catch host objects + if ( !obj || toString.call( obj ) !== "[object Object]" ) { + return false; + } + + proto = getProto( obj ); + + // Objects with no prototype (e.g., `Object.create( null )`) are plain + if ( !proto ) { + return true; + } + + // Objects with prototype are plain iff they were constructed by a global Object function + Ctor = hasOwn.call( proto, "constructor" ) && proto.constructor; + return typeof Ctor === "function" && fnToString.call( Ctor ) === ObjectFunctionString; + }, + + isEmptyObject: function( obj ) { + + /* eslint-disable no-unused-vars */ + // See https://github.com/eslint/eslint/issues/6125 + var name; + + for ( name in obj ) { + return false; + } + return true; + }, + + type: function( obj ) { + if ( obj == null ) { + return obj + ""; + } + + // Support: Android <=2.3 only (functionish RegExp) + return typeof obj === "object" || typeof obj === "function" ? + class2type[ toString.call( obj ) ] || "object" : + typeof obj; + }, + + // Evaluates a script in a global context + globalEval: function( code ) { + DOMEval( code ); + }, + + // Convert dashed to camelCase; used by the css and data modules + // Support: IE <=9 - 11, Edge 12 - 13 + // Microsoft forgot to hump their vendor prefix (#9572) + camelCase: function( string ) { + return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase ); + }, + + nodeName: function( elem, name ) { + return elem.nodeName && elem.nodeName.toLowerCase() === name.toLowerCase(); + }, + + each: function( obj, callback ) { + var length, i = 0; + + if ( isArrayLike( obj ) ) { + length = obj.length; + for ( ; i < length; i++ ) { + if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { + break; + } + } + } else { + for ( i in obj ) { + if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { + break; + } + } + } + + return obj; + }, + + // Support: Android <=4.0 only + trim: function( text ) { + return text == null ? + "" : + ( text + "" ).replace( rtrim, "" ); + }, + + // results is for internal usage only + makeArray: function( arr, results ) { + var ret = results || []; + + if ( arr != null ) { + if ( isArrayLike( Object( arr ) ) ) { + jQuery.merge( ret, + typeof arr === "string" ? + [ arr ] : arr + ); + } else { + push.call( ret, arr ); + } + } + + return ret; + }, + + inArray: function( elem, arr, i ) { + return arr == null ? -1 : indexOf.call( arr, elem, i ); + }, + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + merge: function( first, second ) { + var len = +second.length, + j = 0, + i = first.length; + + for ( ; j < len; j++ ) { + first[ i++ ] = second[ j ]; + } + + first.length = i; + + return first; + }, + + grep: function( elems, callback, invert ) { + var callbackInverse, + matches = [], + i = 0, + length = elems.length, + callbackExpect = !invert; + + // Go through the array, only saving the items + // that pass the validator function + for ( ; i < length; i++ ) { + callbackInverse = !callback( elems[ i ], i ); + if ( callbackInverse !== callbackExpect ) { + matches.push( elems[ i ] ); + } + } + + return matches; + }, + + // arg is for internal usage only + map: function( elems, callback, arg ) { + var length, value, + i = 0, + ret = []; + + // Go through the array, translating each of the items to their new values + if ( isArrayLike( elems ) ) { + length = elems.length; + for ( ; i < length; i++ ) { + value = callback( elems[ i ], i, arg ); + + if ( value != null ) { + ret.push( value ); + } + } + + // Go through every key on the object, + } else { + for ( i in elems ) { + value = callback( elems[ i ], i, arg ); + + if ( value != null ) { + ret.push( value ); + } + } + } + + // Flatten any nested arrays + return concat.apply( [], ret ); + }, + + // A global GUID counter for objects + guid: 1, + + // Bind a function to a context, optionally partially applying any + // arguments. + proxy: function( fn, context ) { + var tmp, args, proxy; + + if ( typeof context === "string" ) { + tmp = fn[ context ]; + context = fn; + fn = tmp; + } + + // Quick check to determine if target is callable, in the spec + // this throws a TypeError, but we will just return undefined. + if ( !jQuery.isFunction( fn ) ) { + return undefined; + } + + // Simulated bind + args = slice.call( arguments, 2 ); + proxy = function() { + return fn.apply( context || this, args.concat( slice.call( arguments ) ) ); + }; + + // Set the guid of unique handler to the same of original handler, so it can be removed + proxy.guid = fn.guid = fn.guid || jQuery.guid++; + + return proxy; + }, + + now: Date.now, + + // jQuery.support is not used in Core but other projects attach their + // properties to it so it needs to exist. + support: support +} ); + +if ( typeof Symbol === "function" ) { + jQuery.fn[ Symbol.iterator ] = arr[ Symbol.iterator ]; +} + +// Populate the class2type map +jQuery.each( "Boolean Number String Function Array Date RegExp Object Error Symbol".split( " " ), +function( i, name ) { + class2type[ "[object " + name + "]" ] = name.toLowerCase(); +} ); + +function isArrayLike( obj ) { + + // Support: real iOS 8.2 only (not reproducible in simulator) + // `in` check used to prevent JIT error (gh-2145) + // hasOwn isn't used here due to false negatives + // regarding Nodelist length in IE + var length = !!obj && "length" in obj && obj.length, + type = jQuery.type( obj ); + + if ( type === "function" || jQuery.isWindow( obj ) ) { + return false; + } + + return type === "array" || length === 0 || + typeof length === "number" && length > 0 && ( length - 1 ) in obj; +} +var Sizzle = +/*! + * Sizzle CSS Selector Engine v2.3.0 + * https://sizzlejs.com/ + * + * Copyright jQuery Foundation and other contributors + * Released under the MIT license + * http://jquery.org/license + * + * Date: 2016-01-04 + */ +(function( window ) { + +var i, + support, + Expr, + getText, + isXML, + tokenize, + compile, + select, + outermostContext, + sortInput, + hasDuplicate, + + // Local document vars + setDocument, + document, + docElem, + documentIsHTML, + rbuggyQSA, + rbuggyMatches, + matches, + contains, + + // Instance-specific data + expando = "sizzle" + 1 * new Date(), + preferredDoc = window.document, + dirruns = 0, + done = 0, + classCache = createCache(), + tokenCache = createCache(), + compilerCache = createCache(), + sortOrder = function( a, b ) { + if ( a === b ) { + hasDuplicate = true; + } + return 0; + }, + + // Instance methods + hasOwn = ({}).hasOwnProperty, + arr = [], + pop = arr.pop, + push_native = arr.push, + push = arr.push, + slice = arr.slice, + // Use a stripped-down indexOf as it's faster than native + // https://jsperf.com/thor-indexof-vs-for/5 + indexOf = function( list, elem ) { + var i = 0, + len = list.length; + for ( ; i < len; i++ ) { + if ( list[i] === elem ) { + return i; + } + } + return -1; + }, + + booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped", + + // Regular expressions + + // http://www.w3.org/TR/css3-selectors/#whitespace + whitespace = "[\\x20\\t\\r\\n\\f]", + + // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier + identifier = "(?:\\\\.|[\\w-]|[^\0-\\xa0])+", + + // Attribute selectors: http://www.w3.org/TR/selectors/#attribute-selectors + attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace + + // Operator (capture 2) + "*([*^$|!~]?=)" + whitespace + + // "Attribute values must be CSS identifiers [capture 5] or strings [capture 3 or capture 4]" + "*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" + whitespace + + "*\\]", + + pseudos = ":(" + identifier + ")(?:\\((" + + // To reduce the number of selectors needing tokenize in the preFilter, prefer arguments: + // 1. quoted (capture 3; capture 4 or capture 5) + "('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" + + // 2. simple (capture 6) + "((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" + + // 3. anything else (capture 2) + ".*" + + ")\\)|)", + + // Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter + rwhitespace = new RegExp( whitespace + "+", "g" ), + rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + whitespace + "+$", "g" ), + + rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ), + rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace + "*" ), + + rattributeQuotes = new RegExp( "=" + whitespace + "*([^\\]'\"]*?)" + whitespace + "*\\]", "g" ), + + rpseudo = new RegExp( pseudos ), + ridentifier = new RegExp( "^" + identifier + "$" ), + + matchExpr = { + "ID": new RegExp( "^#(" + identifier + ")" ), + "CLASS": new RegExp( "^\\.(" + identifier + ")" ), + "TAG": new RegExp( "^(" + identifier + "|[*])" ), + "ATTR": new RegExp( "^" + attributes ), + "PSEUDO": new RegExp( "^" + pseudos ), + "CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" + whitespace + + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + whitespace + + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ), + "bool": new RegExp( "^(?:" + booleans + ")$", "i" ), + // For use in libraries implementing .is() + // We use this for POS matching in `select` + "needsContext": new RegExp( "^" + whitespace + "*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + + whitespace + "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" ) + }, + + rinputs = /^(?:input|select|textarea|button)$/i, + rheader = /^h\d$/i, + + rnative = /^[^{]+\{\s*\[native \w/, + + // Easily-parseable/retrievable ID or TAG or CLASS selectors + rquickExpr = /^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/, + + rsibling = /[+~]/, + + // CSS escapes + // http://www.w3.org/TR/CSS21/syndata.html#escaped-characters + runescape = new RegExp( "\\\\([\\da-f]{1,6}" + whitespace + "?|(" + whitespace + ")|.)", "ig" ), + funescape = function( _, escaped, escapedWhitespace ) { + var high = "0x" + escaped - 0x10000; + // NaN means non-codepoint + // Support: Firefox<24 + // Workaround erroneous numeric interpretation of +"0x" + return high !== high || escapedWhitespace ? + escaped : + high < 0 ? + // BMP codepoint + String.fromCharCode( high + 0x10000 ) : + // Supplemental Plane codepoint (surrogate pair) + String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 ); + }, + + // CSS string/identifier serialization + // https://drafts.csswg.org/cssom/#common-serializing-idioms + rcssescape = /([\0-\x1f\x7f]|^-?\d)|^-$|[^\x80-\uFFFF\w-]/g, + fcssescape = function( ch, asCodePoint ) { + if ( asCodePoint ) { + + // U+0000 NULL becomes U+FFFD REPLACEMENT CHARACTER + if ( ch === "\0" ) { + return "\uFFFD"; + } + + // Control characters and (dependent upon position) numbers get escaped as code points + return ch.slice( 0, -1 ) + "\\" + ch.charCodeAt( ch.length - 1 ).toString( 16 ) + " "; + } + + // Other potentially-special ASCII characters get backslash-escaped + return "\\" + ch; + }, + + // Used for iframes + // See setDocument() + // Removing the function wrapper causes a "Permission Denied" + // error in IE + unloadHandler = function() { + setDocument(); + }, + + disabledAncestor = addCombinator( + function( elem ) { + return elem.disabled === true; + }, + { dir: "parentNode", next: "legend" } + ); + +// Optimize for push.apply( _, NodeList ) +try { + push.apply( + (arr = slice.call( preferredDoc.childNodes )), + preferredDoc.childNodes + ); + // Support: Android<4.0 + // Detect silently failing push.apply + arr[ preferredDoc.childNodes.length ].nodeType; +} catch ( e ) { + push = { apply: arr.length ? + + // Leverage slice if possible + function( target, els ) { + push_native.apply( target, slice.call(els) ); + } : + + // Support: IE<9 + // Otherwise append directly + function( target, els ) { + var j = target.length, + i = 0; + // Can't trust NodeList.length + while ( (target[j++] = els[i++]) ) {} + target.length = j - 1; + } + }; +} + +function Sizzle( selector, context, results, seed ) { + var m, i, elem, nid, match, groups, newSelector, + newContext = context && context.ownerDocument, + + // nodeType defaults to 9, since context defaults to document + nodeType = context ? context.nodeType : 9; + + results = results || []; + + // Return early from calls with invalid selector or context + if ( typeof selector !== "string" || !selector || + nodeType !== 1 && nodeType !== 9 && nodeType !== 11 ) { + + return results; + } + + // Try to shortcut find operations (as opposed to filters) in HTML documents + if ( !seed ) { + + if ( ( context ? context.ownerDocument || context : preferredDoc ) !== document ) { + setDocument( context ); + } + context = context || document; + + if ( documentIsHTML ) { + + // If the selector is sufficiently simple, try using a "get*By*" DOM method + // (excepting DocumentFragment context, where the methods don't exist) + if ( nodeType !== 11 && (match = rquickExpr.exec( selector )) ) { + + // ID selector + if ( (m = match[1]) ) { + + // Document context + if ( nodeType === 9 ) { + if ( (elem = context.getElementById( m )) ) { + + // Support: IE, Opera, Webkit + // TODO: identify versions + // getElementById can match elements by name instead of ID + if ( elem.id === m ) { + results.push( elem ); + return results; + } + } else { + return results; + } + + // Element context + } else { + + // Support: IE, Opera, Webkit + // TODO: identify versions + // getElementById can match elements by name instead of ID + if ( newContext && (elem = newContext.getElementById( m )) && + contains( context, elem ) && + elem.id === m ) { + + results.push( elem ); + return results; + } + } + + // Type selector + } else if ( match[2] ) { + push.apply( results, context.getElementsByTagName( selector ) ); + return results; + + // Class selector + } else if ( (m = match[3]) && support.getElementsByClassName && + context.getElementsByClassName ) { + + push.apply( results, context.getElementsByClassName( m ) ); + return results; + } + } + + // Take advantage of querySelectorAll + if ( support.qsa && + !compilerCache[ selector + " " ] && + (!rbuggyQSA || !rbuggyQSA.test( selector )) ) { + + if ( nodeType !== 1 ) { + newContext = context; + newSelector = selector; + + // qSA looks outside Element context, which is not what we want + // Thanks to Andrew Dupont for this workaround technique + // Support: IE <=8 + // Exclude object elements + } else if ( context.nodeName.toLowerCase() !== "object" ) { + + // Capture the context ID, setting it first if necessary + if ( (nid = context.getAttribute( "id" )) ) { + nid = nid.replace( rcssescape, fcssescape ); + } else { + context.setAttribute( "id", (nid = expando) ); + } + + // Prefix every selector in the list + groups = tokenize( selector ); + i = groups.length; + while ( i-- ) { + groups[i] = "#" + nid + " " + toSelector( groups[i] ); + } + newSelector = groups.join( "," ); + + // Expand context for sibling selectors + newContext = rsibling.test( selector ) && testContext( context.parentNode ) || + context; + } + + if ( newSelector ) { + try { + push.apply( results, + newContext.querySelectorAll( newSelector ) + ); + return results; + } catch ( qsaError ) { + } finally { + if ( nid === expando ) { + context.removeAttribute( "id" ); + } + } + } + } + } + } + + // All others + return select( selector.replace( rtrim, "$1" ), context, results, seed ); +} + +/** + * Create key-value caches of limited size + * @returns {function(string, object)} Returns the Object data after storing it on itself with + * property name the (space-suffixed) string and (if the cache is larger than Expr.cacheLength) + * deleting the oldest entry + */ +function createCache() { + var keys = []; + + function cache( key, value ) { + // Use (key + " ") to avoid collision with native prototype properties (see Issue #157) + if ( keys.push( key + " " ) > Expr.cacheLength ) { + // Only keep the most recent entries + delete cache[ keys.shift() ]; + } + return (cache[ key + " " ] = value); + } + return cache; +} + +/** + * Mark a function for special use by Sizzle + * @param {Function} fn The function to mark + */ +function markFunction( fn ) { + fn[ expando ] = true; + return fn; +} + +/** + * Support testing using an element + * @param {Function} fn Passed the created element and returns a boolean result + */ +function assert( fn ) { + var el = document.createElement("fieldset"); + + try { + return !!fn( el ); + } catch (e) { + return false; + } finally { + // Remove from its parent by default + if ( el.parentNode ) { + el.parentNode.removeChild( el ); + } + // release memory in IE + el = null; + } +} + +/** + * Adds the same handler for all of the specified attrs + * @param {String} attrs Pipe-separated list of attributes + * @param {Function} handler The method that will be applied + */ +function addHandle( attrs, handler ) { + var arr = attrs.split("|"), + i = arr.length; + + while ( i-- ) { + Expr.attrHandle[ arr[i] ] = handler; + } +} + +/** + * Checks document order of two siblings + * @param {Element} a + * @param {Element} b + * @returns {Number} Returns less than 0 if a precedes b, greater than 0 if a follows b + */ +function siblingCheck( a, b ) { + var cur = b && a, + diff = cur && a.nodeType === 1 && b.nodeType === 1 && + a.sourceIndex - b.sourceIndex; + + // Use IE sourceIndex if available on both nodes + if ( diff ) { + return diff; + } + + // Check if b follows a + if ( cur ) { + while ( (cur = cur.nextSibling) ) { + if ( cur === b ) { + return -1; + } + } + } + + return a ? 1 : -1; +} + +/** + * Returns a function to use in pseudos for input types + * @param {String} type + */ +function createInputPseudo( type ) { + return function( elem ) { + var name = elem.nodeName.toLowerCase(); + return name === "input" && elem.type === type; + }; +} + +/** + * Returns a function to use in pseudos for buttons + * @param {String} type + */ +function createButtonPseudo( type ) { + return function( elem ) { + var name = elem.nodeName.toLowerCase(); + return (name === "input" || name === "button") && elem.type === type; + }; +} + +/** + * Returns a function to use in pseudos for :enabled/:disabled + * @param {Boolean} disabled true for :disabled; false for :enabled + */ +function createDisabledPseudo( disabled ) { + // Known :disabled false positives: + // IE: *[disabled]:not(button, input, select, textarea, optgroup, option, menuitem, fieldset) + // not IE: fieldset[disabled] > legend:nth-of-type(n+2) :can-disable + return function( elem ) { + + // Check form elements and option elements for explicit disabling + return "label" in elem && elem.disabled === disabled || + "form" in elem && elem.disabled === disabled || + + // Check non-disabled form elements for fieldset[disabled] ancestors + "form" in elem && elem.disabled === false && ( + // Support: IE6-11+ + // Ancestry is covered for us + elem.isDisabled === disabled || + + // Otherwise, assume any non-