Skip to content

Commit

Permalink
Add prediction_model parameter to ICC sample function
Browse files Browse the repository at this point in the history
Previously, only the 'exact' model could be used in the sample-wise ICC function. Now, there is a "prediction_model" parameter, similar to the population-based ICC function, that allows for the passing of a pre-defined prediction model.

Signed-off-by: Patrick Bloebaum <bloebp@amazon.com>
  • Loading branch information
bloebp committed Oct 13, 2023
1 parent 7eb4a0c commit 86fcb28
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 15 deletions.
53 changes: 39 additions & 14 deletions dowhy/gcm/influence.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,12 @@ def intrinsic_causal_influence(
:param prediction_model: Prediction model for estimating the functional relationship between subsets of ancestor
noise terms and the target node. This can be an instance of a PredictionModel, the string
'approx' or the string 'exact'. With 'exact', the underlying causal models in the graph
are utilized directly by propagating given noise inputs through the graph. This is
generally more accurate but slow. With 'approx', an appropriate model is selected and
trained based on sampled data from the graph, which is less accurate but faster. A more
detailed treatment on why we need this parameter is also provided in :ref:`icc`.
are utilized directly by propagating given noise inputs through the graph, which ensures
that generated samples follow the fitted models. In contrast, the 'approx' method involves
selecting and training a suitable model based on data sampled from the graph. This might
lead to deviations from the outcomes of the fitted models, but is faster and can be more
robust in certain settings. A more detailed treatment on why we need this parameter is
also provided in :ref:`icc`.
:param attribution_func: Optional attribution function to measure the statistical property of the target node. This
function expects two inputs; predictions after the randomization of certain features (i.e.
samples from noise nodes) and a baseline where no features were randomized. The baseline
Expand Down Expand Up @@ -325,9 +327,11 @@ def intrinsic_causal_influence_sample(
target_node: Any,
baseline_samples: pd.DataFrame,
noise_feature_samples: Optional[pd.DataFrame] = None,
prediction_model: Union[PredictionModel, ClassificationModel, str] = "approx",
subset_scoring_func: Optional[Callable[[np.ndarray, np.ndarray], Union[np.ndarray, float]]] = None,
num_noise_feature_samples: int = 5000,
max_batch_size: int = 100,
auto_assign_quality: auto.AssignmentQuality = auto.AssignmentQuality.GOOD,
shapley_config: Optional[ShapleyConfig] = None,
) -> List[Dict[Any, Any]]:
"""Estimates the intrinsic causal impact of upstream nodes on a specified target_node, using the provided
Expand All @@ -342,9 +346,18 @@ def intrinsic_causal_influence_sample(
:param causal_model: The fitted invertible structural causal model.
:param target_node: Node of interest.
:param baseline_samples: Samples for which the influence should be estimated.
:param noise_feature_samples: Optional noise samples of upstream nodes used as 'background' samples.. If None is
:param noise_feature_samples: Optional noise samples of upstream nodes used as 'background' samples. If None is
given, new noise samples are generated based on the graph. These samples are used for
randomizing features that are not in the subset.
:param prediction_model: Prediction model for estimating the functional relationship between subsets of ancestor
noise terms and the target node. This can be an instance of a PredictionModel, the string
'approx' or the string 'exact'. With 'exact', the underlying causal models in the graph
are utilized directly by propagating given noise inputs through the graph, which ensures
that generated samples follow the fitted models. In contrast, the 'approx' method involves
selecting and training a suitable model based on data sampled from the graph. This might
lead to deviations from the outcomes of the fitted models, but is faster and can be more
robust in certain settings. A more detailed treatment on why we need this parameter is
also provided in :ref:`icc`.
:param subset_scoring_func: Set function for estimating the quantity of interest based. This function
expects two inputs; the outcome of the model for some samples if certain features are permuted and the
outcome of the model for the same samples when no features were permuted. By default,
Expand All @@ -353,6 +366,7 @@ def intrinsic_causal_influence_sample(
This parameter indicates how many.
:param max_batch_size: Maximum batch size for estimating multiple predictions at once. This has a significant influence on the
overall memory usage. If set to -1, all samples are used in one batch.
:param auto_assign_quality: Auto assign quality for the 'approx' prediction_model option.
:param shapley_config: :class:`~dowhy.gcm.shapley.ShapleyConfig` for the Shapley estimator.
:return: A list of dictionaries indicating the intrinsic causal influence of a node on the target for a particular
sample. This is, each dictionary belongs to one baseline sample.
Expand All @@ -376,21 +390,32 @@ def intrinsic_causal_influence_sample(
if subset_scoring_func is None:
subset_scoring_func = means_difference

target_samples = feature_samples[target_node].to_numpy()
node_names = noise_feature_samples.columns
noise_feature_samples, target_samples = shape_into_2d(noise_feature_samples.to_numpy(), target_samples)

prediction_method = _get_icc_noise_function(
causal_model,
target_node,
prediction_model,
noise_feature_samples,
node_names,
target_samples,
auto_assign_quality,
False, # Currently only supports continues target since we need to reconstruct its noise term.
)

shapley_vales = feature_relevance_sample(
_get_icc_noise_function(
causal_model, target_node, "exact", noise_feature_samples, noise_feature_samples.columns, None, None, False
),
feature_samples=noise_feature_samples.to_numpy(),
baseline_samples=compute_noise_from_data(causal_model, baseline_samples)[
noise_feature_samples.columns
].to_numpy(),
prediction_method,
feature_samples=noise_feature_samples,
baseline_samples=compute_noise_from_data(causal_model, baseline_samples)[node_names].to_numpy(),
subset_scoring_func=subset_scoring_func,
max_batch_size=max_batch_size,
shapley_config=shapley_config,
)

return [
{(predecessor, target_node): shapley_vales[i][q] for q, predecessor in enumerate(noise_feature_samples.columns)}
{(predecessor, target_node): shapley_vales[i][q] for q, predecessor in enumerate(node_names)}
for i in range(shapley_vales.shape[0])
]

Expand Down Expand Up @@ -432,7 +457,7 @@ def icc_set_function(subset: np.ndarray) -> Union[np.ndarray, float]:


def _get_icc_noise_function(
causal_model: InvertibleStructuralCausalModel,
causal_model: StructuralCausalModel,
target_node: Any,
prediction_model: Union[PredictionModel, ClassificationModel, str],
noise_samples: np.ndarray,
Expand Down
50 changes: 49 additions & 1 deletion tests/gcm/test_intrinsic_influence.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
)
from dowhy.gcm._noise import noise_samples_of_ancestors
from dowhy.gcm.influence import intrinsic_causal_influence_sample
from dowhy.gcm.ml import create_hist_gradient_boost_classifier, create_linear_regressor_with_given_parameters
from dowhy.gcm.ml import (
create_hist_gradient_boost_classifier,
create_hist_gradient_boost_regressor,
create_linear_regressor,
create_linear_regressor_with_given_parameters,
)
from dowhy.gcm.uncertainty import estimate_entropy_of_probabilities, estimate_variance
from dowhy.gcm.util.general import apply_one_hot_encoding, fit_one_hot_encoders
from dowhy.graph import node_connected_subgraph_view
Expand Down Expand Up @@ -247,3 +252,46 @@ def test_given_linear_gaussian_data_when_estimate_sample_wise_intrinsic_causal_i
assert shapley_values[1][("X1", "X3")] == approx(0.5, abs=0.1)
assert shapley_values[1][("X2", "X3")] == approx(2, abs=0.1)
assert shapley_values[1][("X3", "X3")] == approx(1, abs=0.1)


@flaky(max_runs=3)
def test_given_linear_gaussian_data_when_estimate_sample_wise_intrinsic_causal_influence_with_a_pre_defined_model_then_returns_expected_values():
causal_model = InvertibleStructuralCausalModel(nx.DiGraph([("X0", "X1"), ("X1", "X2"), ("X2", "X3")]))

causal_model.set_causal_mechanism("X0", ScipyDistribution(stats.norm, loc=0, scale=1))
causal_model.set_causal_mechanism(
"X1",
AdditiveNoiseModel(
create_linear_regressor_with_given_parameters(np.array([2])), ScipyDistribution(stats.norm, loc=0, scale=1)
),
)
causal_model.set_causal_mechanism(
"X2",
AdditiveNoiseModel(
create_linear_regressor_with_given_parameters(np.array([1])), ScipyDistribution(stats.norm, loc=0, scale=1)
),
)
causal_model.set_causal_mechanism(
"X3",
AdditiveNoiseModel(
create_linear_regressor_with_given_parameters(np.array([1])), ScipyDistribution(stats.norm, loc=0, scale=1)
),
)
_persist_parents(causal_model.graph)

shapley_values = intrinsic_causal_influence_sample(
causal_model,
"X3",
pd.DataFrame({"X0": [0, 1], "X1": [0.5, 2.5], "X2": [1.5, 4.5], "X3": [1.5, 5.5]}),
prediction_model=create_linear_regressor(),
)

assert shapley_values[0][("X0", "X3")] == approx(0, abs=0.15)
assert shapley_values[0][("X1", "X3")] == approx(0.5, abs=0.15)
assert shapley_values[0][("X2", "X3")] == approx(1, abs=0.15)
assert shapley_values[0][("X3", "X3")] == approx(0, abs=0.15)

assert shapley_values[1][("X0", "X3")] == approx(2, abs=0.15)
assert shapley_values[1][("X1", "X3")] == approx(0.5, abs=0.15)
assert shapley_values[1][("X2", "X3")] == approx(2, abs=0.15)
assert shapley_values[1][("X3", "X3")] == approx(1, abs=0.15)

0 comments on commit 86fcb28

Please sign in to comment.