import os
import warnings
base_path = os.path.abspath("../")
os.chdir(base_path)
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from causalml.inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor
from causalml.dataset.regression import synthetic_data
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import eli5
import shap
import matplotlib.pyplot as plt
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # for lightgbm to work
%reload_ext autoreload
%autoreload 2
%matplotlib inline
plt.style.use('fivethirtyeight')
n_features = 25
n_samples = 10000
y, X, w, tau, b, e = synthetic_data(mode=1, n=n_samples, p=n_features, sigma=0.5)
w_multi = np.array(['treatment_A' if x==1 else 'control' for x in w])
e_multi = {'treatment_A': e}
base_algo = LGBMRegressor()
# base_algo = XGBRegressor()
# base_algo = RandomForestRegressor()
# base_algo = LinearRegression()
slearner = BaseSRegressor(base_algo, control_name='control')
slearner.estimate_ate(X, w_multi, y)
slearner_tau = slearner.fit_predict(X, w_multi, y)
gini)¶model_tau_feature = RandomForestRegressor() # specify model for model_tau_feature
feature_names = ['stars', 'tiger', 'merciful', 'quixotic', 'fireman', 'dependent',
'shelf', 'touch', 'barbarous', 'clammy', 'playground', 'rain', 'offer',
'cute', 'future', 'damp', 'nonchalant', 'change', 'rigid', 'sweltering',
'eight', 'wrap', 'lethal', 'adhesive', 'lip'] # specify feature names
slearner.get_importance(X=X, tau=slearner_tau, model_tau_feature=model_tau_feature,
normalize=True, method='gini', features=feature_names)
slearner.plot_importance(X=X, tau=slearner_tau, normalize=True, method='gini')
permutation)¶slearner.get_importance(X=X, tau=slearner_tau, method='permutation')
slearner.plot_importance(X=X, tau=slearner_tau, method='permutation')
shap_slearner = slearner.get_shap_values(X=X, tau=slearner_tau)
shap_slearner
np.mean(np.abs(shap_slearner['treatment_A']),axis=0)
# Plot shap values without specifying shap_dict
slearner.plot_shap_values(X=X, tau=slearner_tau)
# Plot shap values WITH specifying shap_dict
slearner.plot_shap_values(shap_dict=shap_slearner)
# interaction_idx set to None (no color coding for interaction effects)
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=1,
X=X,
tau=slearner_tau,
interaction_idx=None,
shap_dict=shap_slearner)
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=1,
X=X,
tau=slearner_tau,
interaction_idx='auto')
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
# specify feature names
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx='tiger',
X=X,
tau=slearner_tau,
interaction_idx='auto',
shap_dict=shap_slearner,
features=feature_names)
# interaction_idx set to specific index
slearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=1,
X=X,
tau=slearner_tau,
interaction_idx=10,
shap_dict=shap_slearner)
tlearner = BaseTRegressor(LGBMRegressor(), control_name='control')
tlearner.estimate_ate(X, w_multi, y)
tlearner_tau = tlearner.fit_predict(X, w_multi, y)
gini)¶tlearner.get_importance(X=X, tau=tlearner_tau, normalize=True, method='gini')
tlearner.plot_importance(X=X, tau=tlearner_tau, normalize=True, method='gini')
permutation)¶tlearner.get_importance(X=X, tau=tlearner_tau, method='permutation')
tlearner.plot_importance(X=X, tau=tlearner_tau, method='permutation')
shap_tlearner = tlearner.get_shap_values(X=X, tau=tlearner_tau)
shap_tlearner
# Plot shap values without specifying shap_dict
tlearner.plot_shap_values(X=X, tau=tlearner_tau)
# Plot shap values WITH specifying shap_dict
tlearner.plot_shap_values(shap_dict=shap_tlearner)
# interaction_idx set to None (no color coding for interaction effects)
tlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=3,
X=X,
tau=tlearner_tau,
interaction_idx=None,
shap_dict=shap_tlearner)
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
tlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=3,
X=X,
tau=tlearner_tau,
interaction_idx='auto',
shap_dict=shap_tlearner)
# interaction_idx set to specific index
tlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=3,
X=X,
tau=tlearner_tau,
interaction_idx=10,
shap_dict=shap_tlearner)
xlearner = BaseXRegressor(LGBMRegressor(), control_name='control')
xlearner.estimate_ate(X, e_multi, w_multi, y)
xlearner_tau = xlearner.predict(X, e_multi, w_multi, y)
gini)¶xlearner.get_importance(X=X, tau=xlearner_tau, normalize=True, method='gini')
xlearner.plot_importance(X=X, tau=xlearner_tau, normalize=True, method='gini')
permutation)¶xlearner.get_importance(X=X, tau=xlearner_tau, method='permutation')
xlearner.plot_importance(X=X, tau=xlearner_tau, method='permutation')
shap_xlearner = xlearner.get_shap_values(X=X, tau=xlearner_tau)
shap_xlearner
# shap_dict not specified
xlearner.plot_shap_values(X=X, tau=xlearner_tau)
# shap_dict specified
xlearner.plot_shap_values(shap_dict=shap_xlearner)
# interaction_idx set to None (no color coding for interaction effects)
xlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=0,
X=X,
tau=xlearner_tau,
interaction_idx=None,
shap_dict=shap_xlearner)
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
xlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=0,
X=X,
tau=xlearner_tau,
interaction_idx='auto',
shap_dict=shap_xlearner)
# interaction_idx set to specific index
xlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=0,
X=X,
tau=xlearner_tau,
interaction_idx=10,
shap_dict=shap_xlearner)
rlearner = BaseRRegressor(LGBMRegressor(), control_name='control')
rlearner.estimate_ate(X, e_multi, w_multi, y)
rlearner_tau = rlearner.fit_predict(X, e_multi, w_multi, y)
gini)¶rlearner.get_importance(X=X, tau=rlearner_tau, normalize=True, method='gini')
rlearner.plot_importance(X=X, tau=rlearner_tau, normalize=True, method='gini')
permutation)¶rlearner.get_importance(X=X, tau=rlearner_tau, method='permutation')
rlearner.plot_importance(X=X, tau=rlearner_tau, method='permutation')
shap_rlearner = rlearner.get_shap_values(X=X, tau=rlearner_tau)
shap_rlearner
# without providing shap_dict
rlearner.plot_shap_values(X=X, tau=rlearner_tau)
# with providing shap_dict
rlearner.plot_shap_values(shap_dict=shap_rlearner)
# interaction_idx set to None (no color coding for interaction effects)
rlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=0,
X=X,
tau=rlearner_tau,
interaction_idx=None,
shap_dict=shap_rlearner)
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
rlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=0,
X=X,
tau=rlearner_tau,
interaction_idx='auto',
shap_dict=shap_rlearner)
# interaction_idx set to specific index
rlearner.plot_shap_dependence(treatment_group='treatment_A',
feature_idx=1,
X=X,
tau=rlearner_tau,
interaction_idx=20,
shap_dict=shap_rlearner)