@@ -243,10 +243,12 @@ def intrinsic_causal_influence(
243
243
:param prediction_model: Prediction model for estimating the functional relationship between subsets of ancestor
244
244
noise terms and the target node. This can be an instance of a PredictionModel, the string
245
245
'approx' or the string 'exact'. With 'exact', the underlying causal models in the graph
246
- are utilized directly by propagating given noise inputs through the graph. This is
247
- generally more accurate but slow. With 'approx', an appropriate model is selected and
248
- trained based on sampled data from the graph, which is less accurate but faster. A more
249
- detailed treatment on why we need this parameter is also provided in :ref:`icc`.
246
+ are utilized directly by propagating given noise inputs through the graph, which ensures
247
+ that generated samples follow the fitted models. In contrast, the 'approx' method involves
248
+ selecting and training a suitable model based on data sampled from the graph. This might
249
+ lead to deviations from the outcomes of the fitted models, but is faster and can be more
250
+ robust in certain settings. A more detailed treatment on why we need this parameter is
251
+ also provided in :ref:`icc`.
250
252
:param attribution_func: Optional attribution function to measure the statistical property of the target node. This
251
253
function expects two inputs; predictions after the randomization of certain features (i.e.
252
254
samples from noise nodes) and a baseline where no features were randomized. The baseline
@@ -325,9 +327,11 @@ def intrinsic_causal_influence_sample(
325
327
target_node : Any ,
326
328
baseline_samples : pd .DataFrame ,
327
329
noise_feature_samples : Optional [pd .DataFrame ] = None ,
330
+ prediction_model : Union [PredictionModel , ClassificationModel , str ] = "approx" ,
328
331
subset_scoring_func : Optional [Callable [[np .ndarray , np .ndarray ], Union [np .ndarray , float ]]] = None ,
329
332
num_noise_feature_samples : int = 5000 ,
330
333
max_batch_size : int = 100 ,
334
+ auto_assign_quality : auto .AssignmentQuality = auto .AssignmentQuality .GOOD ,
331
335
shapley_config : Optional [ShapleyConfig ] = None ,
332
336
) -> List [Dict [Any , Any ]]:
333
337
"""Estimates the intrinsic causal impact of upstream nodes on a specified target_node, using the provided
@@ -342,9 +346,18 @@ def intrinsic_causal_influence_sample(
342
346
:param causal_model: The fitted invertible structural causal model.
343
347
:param target_node: Node of interest.
344
348
:param baseline_samples: Samples for which the influence should be estimated.
345
- :param noise_feature_samples: Optional noise samples of upstream nodes used as 'background' samples.. If None is
349
+ :param noise_feature_samples: Optional noise samples of upstream nodes used as 'background' samples. If None is
346
350
given, new noise samples are generated based on the graph. These samples are used for
347
351
randomizing features that are not in the subset.
352
+ :param prediction_model: Prediction model for estimating the functional relationship between subsets of ancestor
353
+ noise terms and the target node. This can be an instance of a PredictionModel, the string
354
+ 'approx' or the string 'exact'. With 'exact', the underlying causal models in the graph
355
+ are utilized directly by propagating given noise inputs through the graph, which ensures
356
+ that generated samples follow the fitted models. In contrast, the 'approx' method involves
357
+ selecting and training a suitable model based on data sampled from the graph. This might
358
+ lead to deviations from the outcomes of the fitted models, but is faster and can be more
359
+ robust in certain settings. A more detailed treatment on why we need this parameter is
360
+ also provided in :ref:`icc`.
348
361
:param subset_scoring_func: Set function for estimating the quantity of interest based. This function
349
362
expects two inputs; the outcome of the model for some samples if certain features are permuted and the
350
363
outcome of the model for the same samples when no features were permuted. By default,
@@ -353,6 +366,7 @@ def intrinsic_causal_influence_sample(
353
366
This parameter indicates how many.
354
367
:param max_batch_size: Maximum batch size for estimating multiple predictions at once. This has a significant influence on the
355
368
overall memory usage. If set to -1, all samples are used in one batch.
369
+ :param auto_assign_quality: Auto assign quality for the 'approx' prediction_model option.
356
370
:param shapley_config: :class:`~dowhy.gcm.shapley.ShapleyConfig` for the Shapley estimator.
357
371
:return: A list of dictionaries indicating the intrinsic causal influence of a node on the target for a particular
358
372
sample. This is, each dictionary belongs to one baseline sample.
@@ -376,21 +390,32 @@ def intrinsic_causal_influence_sample(
376
390
if subset_scoring_func is None :
377
391
subset_scoring_func = means_difference
378
392
393
+ target_samples = feature_samples [target_node ].to_numpy ()
394
+ node_names = noise_feature_samples .columns
395
+ noise_feature_samples , target_samples = shape_into_2d (noise_feature_samples .to_numpy (), target_samples )
396
+
397
+ prediction_method = _get_icc_noise_function (
398
+ causal_model ,
399
+ target_node ,
400
+ prediction_model ,
401
+ noise_feature_samples ,
402
+ node_names ,
403
+ target_samples ,
404
+ auto_assign_quality ,
405
+ False , # Currently only supports continues target since we need to reconstruct its noise term.
406
+ )
407
+
379
408
shapley_vales = feature_relevance_sample (
380
- _get_icc_noise_function (
381
- causal_model , target_node , "exact" , noise_feature_samples , noise_feature_samples .columns , None , None , False
382
- ),
383
- feature_samples = noise_feature_samples .to_numpy (),
384
- baseline_samples = compute_noise_from_data (causal_model , baseline_samples )[
385
- noise_feature_samples .columns
386
- ].to_numpy (),
409
+ prediction_method ,
410
+ feature_samples = noise_feature_samples ,
411
+ baseline_samples = compute_noise_from_data (causal_model , baseline_samples )[node_names ].to_numpy (),
387
412
subset_scoring_func = subset_scoring_func ,
388
413
max_batch_size = max_batch_size ,
389
414
shapley_config = shapley_config ,
390
415
)
391
416
392
417
return [
393
- {(predecessor , target_node ): shapley_vales [i ][q ] for q , predecessor in enumerate (noise_feature_samples . columns )}
418
+ {(predecessor , target_node ): shapley_vales [i ][q ] for q , predecessor in enumerate (node_names )}
394
419
for i in range (shapley_vales .shape [0 ])
395
420
]
396
421
@@ -432,7 +457,7 @@ def icc_set_function(subset: np.ndarray) -> Union[np.ndarray, float]:
432
457
433
458
434
459
def _get_icc_noise_function (
435
- causal_model : InvertibleStructuralCausalModel ,
460
+ causal_model : StructuralCausalModel ,
436
461
target_node : Any ,
437
462
prediction_model : Union [PredictionModel , ClassificationModel , str ],
438
463
noise_samples : np .ndarray ,
0 commit comments