iPFI and iSAGE with categorical inputs #85

rjagtani · 2023-09-15T15:02:10Z

iPFI and iSAGE attribute all importance to categorical variables and return ~0 fi scores for all numerical variables. The issue can be reproduced using this code.

# Load imports
from river import metrics
from river.utils import Rolling
from river.ensemble import AdaptiveRandomForestRegressor
from river.datasets import Bikes
from river import preprocessing
from river import compose
from ixai.explainer import IncrementalPFI, IncrementalSage, IncrementalPDP
from ixai.utils.wrappers import RiverWrapper
from ixai.storage import GeometricReservoirStorage
from ixai.imputer import MarginalImputer
#%%
# Set config variables
RANDOM_SEED = 42
#%%
# Load stream
stream = Bikes()
#%%
# Printing an observation from this stream
for n, (x,y) in enumerate(stream):
    if n>0:
        break
    print(x)
    print(y)
#%%
# Include features that are passed to the model
cat_vars = ['station', 'description']
num_vars = ['clouds','humidity','pressure','temperature','wind']
feature_names = num_vars + ['description']
#feature_names = num_vars
#%%
# Model and training setup
model = compose.Pipeline(
    compose.Select('description') | preprocessing.OneHotEncoder()
    | AdaptiveRandomForestRegressor(seed=RANDOM_SEED)
    )

#model = AdaptiveRandomForestRegressor(seed=RANDOM_SEED)
#%%
# Use River Wrapper around model function to standardize model outputs, Initialize loss and training metric depending on ML task
model_function = RiverWrapper(model.predict_one)
loss_metric = metrics.MAE()
training_metric = Rolling(metrics.MAE(), window_size=1000)
#%%
# Instantiate Storage Object and Imputer
storage = GeometricReservoirStorage(
    size=500,
    store_targets=False
)

imputer = MarginalImputer(
    model_function=model_function,
    storage_object=storage,
    sampling_strategy="joint"
)
#%%
# Instantiate Incremental PFI Explainer
incremental_pfi = IncrementalPFI(
    model_function=model_function,
    loss_function=loss_metric,
    feature_names=feature_names,
    smoothing_alpha=0.01,
    n_inner_samples=4,
    imputer=imputer,
    storage=storage
)
#%%
# Instantiate Incremental SAGE Explainer
incremental_sage = IncrementalSage(
    model_function=model_function,
    loss_function=loss_metric,
    imputer=imputer,
    storage=storage,
    feature_names=feature_names,
    smoothing_alpha=0.01,
    n_inner_samples=4
)
#%%
# Instantiate Incremental PDP Explainer
incremental_pdp = IncrementalPDP(
    model_function=model_function,
    gridsize=8,
    dynamic_setting=True,
    smoothing_alpha=0.01,
    pdp_feature='humidity',
    storage=storage,
    storage_size=100,
    is_classification=False
)
#%%
# Iterate over stream and explain each instance using explainers
for (n, (x_i, y_i)) in enumerate(stream, start=1):
    x_i = dict((k, x_i[k]) for k in feature_names)
    y_i_pred = model.predict_one(x_i)
    #print(y_i_pred)
    training_metric.update(y_true=y_i, y_pred=y_i_pred)

    # explaining
    inc_sage = incremental_sage.explain_one(x_i, y_i)
    inc_fi_pfi = incremental_pfi.explain_one(x_i, y_i, update_storage=False)
    inc_pdp = incremental_pdp.explain_one(x_i, update_storage=False)

    # learning
    model.learn_one(x_i, y_i)
    #print("Here")
    if n % 250 == 0:
        print(f"{n}: perf {training_metric.get()}\n"
              f"{n}: sage  {incremental_sage.importance_values}\n"
              f"{n}: pfi  {incremental_pfi.importance_values}\n")

    if n >= 1000:
        incremental_pdp.plot_pdp()
        break

The text was updated successfully, but these errors were encountered:

mmschlk · 2023-09-15T15:51:23Z

Thank you!

mmschlk · 2023-09-20T13:35:55Z

I am pretty sure this comes from the pipeline object...

rjagtani mentioned this issue Sep 15, 2023

Handle unseen labels in multiclass classification for PDP #86

Open

mmschlk self-assigned this Sep 15, 2023

mmschlk added the bug Something isn't working label Sep 15, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

iPFI and iSAGE with categorical inputs #85

iPFI and iSAGE with categorical inputs #85

rjagtani commented Sep 15, 2023

mmschlk commented Sep 15, 2023

mmschlk commented Sep 20, 2023

iPFI and iSAGE with categorical inputs #85

iPFI and iSAGE with categorical inputs #85

Comments

rjagtani commented Sep 15, 2023

mmschlk commented Sep 15, 2023

mmschlk commented Sep 20, 2023