In [1]:
import os
import warnings
base_path = os.path.abspath("../")
os.chdir(base_path)
warnings.filterwarnings('ignore')
In [5]:
!pip install pydotplus
Looking in indexes: https://yoober8:****@pypi.uberinternal.com/index, https://pypi.python.org/simple
Collecting pydotplus
  Downloading https://pypi.uberinternal.com/packages/60/bf/62567830b700d9f6930e9ab6831d6ba256f7b0b730acb37278b0ccdffacf/pydotplus-2.0.2.tar.gz (278kB)
     |████████████████████████████████| 286kB 722kB/s eta 0:00:01
Requirement already satisfied: pyparsing>=2.0.1 in /anaconda2/envs/python3.6/lib/python3.6/site-packages (from pydotplus) (2.3.1)
Building wheels for collected packages: pydotplus
  Building wheel for pydotplus (setup.py) ... done
  Created wheel for pydotplus: filename=pydotplus-2.0.2-cp36-none-any.whl size=24568 sha256=a4d6912750a810f8c8d622950a1cb17d16e0edbe93dcfab6cbf88fe48e3b9acb
  Stored in directory: /Users/mike.yung/Library/Caches/pip/wheels/23/3b/2d/28cdfc9b800849a21217148ba4807d2508e1e3d54c0d3c0fb7
Successfully built pydotplus
Installing collected packages: pydotplus
Successfully installed pydotplus-2.0.2
In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from causalml.inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor
from causalml.dataset.regression import synthetic_data
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import eli5
import shap
import matplotlib.pyplot as plt

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'  # for lightgbm to work

%reload_ext autoreload
%autoreload 2
%matplotlib inline
In [7]:
plt.style.use('fivethirtyeight')
In [8]:
n_features = 25
n_samples = 10000
y, X, w, tau, b, e = synthetic_data(mode=1, n=n_samples, p=n_features, sigma=0.5)
In [9]:
w_multi = np.array(['treatment_A' if x==1 else 'control' for x in w])
e_multi = {'treatment_A': e}

S Learner

In [10]:
base_algo = LGBMRegressor()
# base_algo = XGBRegressor()
# base_algo = RandomForestRegressor()
# base_algo = LinearRegression()

slearner = BaseSRegressor(base_algo, control_name='control')
slearner.estimate_ate(X, w_multi, y)
Out[10]:
array([0.55490032])
In [11]:
slearner_tau = slearner.fit_predict(X, w_multi, y)

Feature Importance (method = auto)

In [29]:
feature_names = ['stars', 'tiger', 'merciful', 'quixotic', 'fireman', 'dependent',
                 'shelf', 'touch', 'barbarous', 'clammy', 'playground', 'rain', 'offer',
                 'cute', 'future', 'damp', 'nonchalant', 'change', 'rigid', 'sweltering',
                 'eight', 'wrap', 'lethal', 'adhesive', 'lip']  # specify feature names

model_tau_feature = RandomForestRegressor()  # specify model for model_tau_feature

slearner.get_importance(X=X, tau=slearner_tau, model_tau_feature=model_tau_feature,
                        normalize=True, method='auto', features=feature_names)
Out[29]:
{'treatment_A': stars         0.473259
 tiger         0.355915
 quixotic      0.065642
 fireman       0.024865
 merciful      0.020751
 eight         0.003590
 wrap          0.003373
 nonchalant    0.003351
 sweltering    0.003208
 playground    0.003106
 rigid         0.003087
 adhesive      0.003073
 future        0.003064
 rain          0.003015
 barbarous     0.002970
 lip           0.002949
 touch         0.002940
 dependent     0.002864
 clammy        0.002768
 damp          0.002767
 shelf         0.002748
 lethal        0.002730
 change        0.002717
 cute          0.002676
 offer         0.002572
 dtype: float64}
In [28]:
slearner.plot_importance(X=X, tau=slearner_tau, normalize=True, method='auto')

Feature Importance (method = permutation)

In [30]:
slearner.get_importance(X=X, tau=slearner_tau, method='permutation')
Out[30]:
{'treatment_A': Feature_000    1.023350
 Feature_001    0.796815
 Feature_003    0.178852
 Feature_004    0.053207
 Feature_002    0.046580
 Feature_021    0.001398
 Feature_014    0.000375
 Feature_012    0.000361
 Feature_023    0.000221
 Feature_010    0.000142
 Feature_022    0.000118
 Feature_024    0.000111
 Feature_016    0.000109
 Feature_005    0.000088
 Feature_007    0.000083
 Feature_006    0.000070
 Feature_015    0.000064
 Feature_017    0.000051
 Feature_020    0.000040
 Feature_011    0.000039
 Feature_018    0.000037
 Feature_019    0.000020
 Feature_013    0.000015
 Feature_008   -0.000012
 Feature_009   -0.000033
 dtype: float64}
In [31]:
slearner.plot_importance(X=X, tau=slearner_tau, method='permutation')

Shapley Values

In [32]:
shap_slearner = slearner.get_shap_values(X=X, tau=slearner_tau)
shap_slearner
Out[32]:
{'treatment_A': array([[ 1.67344196e-01,  1.01392731e-01, -1.02343975e-02, ...,
         -5.82282122e-04, -5.80429075e-04, -1.15121498e-03],
        [ 1.63331642e-01,  6.95368733e-02, -1.19225214e-02, ...,
         -4.72007164e-04, -1.99973955e-03,  1.75883504e-03],
        [ 2.46646508e-02,  1.32044714e-01,  5.81708982e-03, ...,
         -4.07797765e-04, -1.79385984e-04, -1.23061901e-03],
        ...,
        [-7.56204620e-02, -2.09544627e-01,  2.27119362e-03, ...,
         -5.30442302e-04, -7.31556583e-04, -3.30428708e-04],
        [-2.13276076e-01,  4.29964944e-02,  3.90619078e-03, ...,
         -2.75032717e-04,  6.39307999e-03, -2.64941113e-03],
        [ 2.00900667e-01,  1.08645986e-01,  5.93803891e-02, ...,
         -5.33713031e-04, -2.15799313e-04,  3.10745656e-03]])}
In [33]:
np.mean(np.abs(shap_slearner['treatment_A']),axis=0)
Out[33]:
array([0.15178745, 0.12639096, 0.01523069, 0.04146271, 0.01774858,
       0.00053926, 0.00031188, 0.00034881, 0.00045959, 0.00101085,
       0.0005198 , 0.0003664 , 0.00193809, 0.00019708, 0.00094902,
       0.00069249, 0.00059389, 0.00068306, 0.00046127, 0.00040038,
       0.00058764, 0.00362502, 0.00097755, 0.00129758, 0.00101108])
In [34]:
# Plot shap values without specifying shap_dict
slearner.plot_shap_values(X=X, tau=slearner_tau)
In [35]:
# Plot shap values WITH specifying shap_dict
slearner.plot_shap_values(shap_dict=shap_slearner)
In [36]:
# interaction_idx set to None (no color coding for interaction effects)
slearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=1,
                              X=X,
                              tau=slearner_tau,
                              interaction_idx=None,
                              shap_dict=shap_slearner)
In [37]:
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
slearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=1,
                              X=X,
                              tau=slearner_tau,
                              interaction_idx='auto')
In [38]:
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
# specify feature names
slearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx='tiger',
                              X=X,
                              tau=slearner_tau,
                              interaction_idx='auto',
                              shap_dict=shap_slearner,
                              features=feature_names)
In [39]:
# interaction_idx set to specific index
slearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=1,
                              X=X,
                              tau=slearner_tau,
                              interaction_idx=10,
                              shap_dict=shap_slearner)

T Learner

In [40]:
tlearner = BaseTRegressor(LGBMRegressor(), control_name='control')
tlearner.estimate_ate(X, w_multi, y)
Out[40]:
(array([0.55057463]), array([0.53564934]), array([0.56549991]))
In [41]:
tlearner_tau = tlearner.fit_predict(X, w_multi, y)

Feature Importance (method = auto)

In [43]:
tlearner.get_importance(X=X, tau=tlearner_tau, normalize=True, method='auto')
Out[43]:
{'treatment_A': Feature_000    0.362333
 Feature_001    0.299191
 Feature_003    0.059810
 Feature_002    0.036877
 Feature_004    0.023586
 Feature_021    0.021891
 Feature_023    0.015426
 Feature_006    0.015038
 Feature_012    0.014468
 Feature_018    0.013687
 Feature_020    0.013223
 Feature_007    0.012726
 Feature_019    0.012679
 Feature_022    0.011005
 Feature_017    0.010635
 Feature_014    0.009610
 Feature_013    0.009099
 Feature_024    0.009047
 Feature_016    0.007761
 Feature_015    0.007745
 Feature_005    0.007433
 Feature_010    0.007382
 Feature_008    0.007164
 Feature_011    0.006978
 Feature_009    0.005206
 dtype: float64}
In [44]:
tlearner.plot_importance(X=X, tau=tlearner_tau, normalize=True, method='auto')

Feature Importance (method = permutation)

In [46]:
tlearner.get_importance(X=X, tau=tlearner_tau, method='permutation')
Out[46]:
{'treatment_A': Feature_000    0.584451
 Feature_001    0.471164
 Feature_003    0.063217
 Feature_002    0.028658
 Feature_004    0.020477
 Feature_021    0.017301
 Feature_023    0.009117
 Feature_006    0.008424
 Feature_012    0.008003
 Feature_018    0.007441
 Feature_007    0.007398
 Feature_020    0.007170
 Feature_019    0.006252
 Feature_022    0.005802
 Feature_014    0.004788
 Feature_017    0.004465
 Feature_024    0.004456
 Feature_005    0.004155
 Feature_013    0.003868
 Feature_008    0.003045
 Feature_015    0.002910
 Feature_010    0.002492
 Feature_011    0.002238
 Feature_016    0.002034
 Feature_009    0.001245
 dtype: float64}
In [47]:
tlearner.plot_importance(X=X, tau=tlearner_tau, method='permutation')

Shapley Values

In [48]:
shap_tlearner = tlearner.get_shap_values(X=X, tau=tlearner_tau)
shap_tlearner
Out[48]:
{'treatment_A': array([[ 0.15774151,  0.06697421, -0.03786978, ..., -0.02137831,
         -0.03537149, -0.0124503 ],
        [ 0.20747103,  0.07536986,  0.01902383, ...,  0.00255734,
         -0.00838789,  0.0299999 ],
        [ 0.01407802,  0.11898248,  0.01170114, ..., -0.00821638,
          0.03396484,  0.00448613],
        ...,
        [-0.12957047, -0.23413225,  0.00413373, ..., -0.00735772,
         -0.02484702, -0.00556529],
        [-0.31320172,  0.01283734,  0.02099349, ..., -0.00629242,
          0.0207837 , -0.00708764],
        [ 0.22571325,  0.09594434,  0.08014993, ..., -0.00977149,
          0.01672412,  0.02805249]])}
In [49]:
# Plot shap values without specifying shap_dict
tlearner.plot_shap_values(X=X, tau=tlearner_tau)
In [50]:
# Plot shap values WITH specifying shap_dict
tlearner.plot_shap_values(shap_dict=shap_tlearner)
In [51]:
# interaction_idx set to None (no color coding for interaction effects)
tlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=3,
                              X=X,
                              tau=tlearner_tau,
                              interaction_idx=None,
                              shap_dict=shap_tlearner)
In [52]:
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
tlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=3,
                              X=X,
                              tau=tlearner_tau,
                              interaction_idx='auto',
                              shap_dict=shap_tlearner)
In [53]:
# interaction_idx set to specific index
tlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=3,
                              X=X,
                              tau=tlearner_tau,
                              interaction_idx=10,
                              shap_dict=shap_tlearner)

X Learner

In [54]:
xlearner = BaseXRegressor(LGBMRegressor(), control_name='control')
xlearner.estimate_ate(X, e_multi, w_multi, y)
Out[54]:
(array([0.49756847]), array([0.48351489]), array([0.51162205]))
In [55]:
xlearner_tau = xlearner.predict(X, e_multi, w_multi, y)

Feature Importance (method = auto)

In [56]:
xlearner.get_importance(X=X, tau=xlearner_tau, normalize=True, method='auto')
Out[56]:
{'treatment_A': Feature_000    0.412602
 Feature_001    0.371857
 Feature_002    0.022709
 Feature_003    0.022347
 Feature_021    0.017301
 Feature_007    0.016803
 Feature_012    0.012952
 Feature_006    0.011147
 Feature_022    0.009847
 Feature_004    0.009577
 Feature_010    0.009058
 Feature_023    0.008973
 Feature_018    0.008747
 Feature_020    0.008271
 Feature_013    0.007478
 Feature_024    0.006807
 Feature_019    0.006313
 Feature_008    0.005679
 Feature_017    0.005359
 Feature_011    0.004929
 Feature_015    0.004841
 Feature_005    0.004792
 Feature_016    0.004664
 Feature_014    0.004223
 Feature_009    0.002724
 dtype: float64}
In [57]:
xlearner.plot_importance(X=X, tau=xlearner_tau, normalize=True, method='auto')

Feature Importance (method = permutation)

In [58]:
xlearner.get_importance(X=X, tau=xlearner_tau, method='permutation')
Out[58]:
{'treatment_A': Feature_000    0.805899
 Feature_001    0.666593
 Feature_002    0.031865
 Feature_003    0.026678
 Feature_007    0.023633
 Feature_021    0.021480
 Feature_012    0.016873
 Feature_006    0.011455
 Feature_022    0.009709
 Feature_018    0.008483
 Feature_010    0.007861
 Feature_023    0.007768
 Feature_020    0.007495
 Feature_004    0.007405
 Feature_019    0.006410
 Feature_024    0.005813
 Feature_013    0.005790
 Feature_017    0.005267
 Feature_008    0.004484
 Feature_005    0.003941
 Feature_015    0.003777
 Feature_016    0.002925
 Feature_014    0.002818
 Feature_011    0.002594
 Feature_009    0.001561
 dtype: float64}
In [59]:
xlearner.plot_importance(X=X, tau=xlearner_tau, method='permutation')

Shapley Values

In [60]:
shap_xlearner = xlearner.get_shap_values(X=X, tau=xlearner_tau)
shap_xlearner
Out[60]:
{'treatment_A': array([[ 0.19731437,  0.08685437, -0.02151614, ..., -0.00902779,
         -0.02825   , -0.009991  ],
        [ 0.20786704,  0.05807552,  0.02494219, ...,  0.00182426,
          0.00068328,  0.01226228],
        [-0.00410755,  0.08706992,  0.01258358, ..., -0.00937363,
          0.02021668,  0.0023147 ],
        ...,
        [-0.1137614 , -0.16566528,  0.00774009, ..., -0.00468378,
         -0.01635273,  0.0004275 ],
        [-0.32880756, -0.0076652 ,  0.00939252, ..., -0.00668894,
          0.01355206, -0.00397982],
        [ 0.2185561 ,  0.08927071,  0.05098435, ...,  0.02362597,
          0.01393286,  0.0142966 ]])}
In [61]:
# shap_dict not specified
xlearner.plot_shap_values(X=X, tau=xlearner_tau)
In [62]:
# shap_dict specified
xlearner.plot_shap_values(shap_dict=shap_xlearner)
In [63]:
# interaction_idx set to None (no color coding for interaction effects)
xlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=0,
                              X=X,
                              tau=xlearner_tau,
                              interaction_idx=None,
                              shap_dict=shap_xlearner)
In [64]:
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
xlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=0,
                              X=X,
                              tau=xlearner_tau,
                              interaction_idx='auto',
                              shap_dict=shap_xlearner)
In [65]:
# interaction_idx set to specific index
xlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=0,
                              X=X,
                              tau=xlearner_tau,
                              interaction_idx=10,
                              shap_dict=shap_xlearner)

R Learner

In [102]:
rlearner = BaseRRegressor(LGBMRegressor(importance_type='gain'), control_name='control')
rlearner_tau = rlearner.fit_predict(X, e_multi, w_multi, y)

Feature Importance (method = auto)

In [104]:
rlearner.get_importance(X=X, tau=rlearner_tau, normalize=True, method='auto')
Out[104]:
{'treatment_A': Feature_001    0.067064
 Feature_000    0.061068
 Feature_013    0.049578
 Feature_012    0.049523
 Feature_021    0.045252
 Feature_014    0.041098
 Feature_006    0.040958
 Feature_007    0.040852
 Feature_010    0.039463
 Feature_008    0.038707
 Feature_005    0.038323
 Feature_019    0.037470
 Feature_003    0.037467
 Feature_024    0.037409
 Feature_017    0.036745
 Feature_015    0.036739
 Feature_022    0.036329
 Feature_009    0.035544
 Feature_004    0.034709
 Feature_020    0.034173
 Feature_002    0.033933
 Feature_023    0.032531
 Feature_018    0.032120
 Feature_016    0.031664
 Feature_011    0.031284
 dtype: float64}
In [105]:
rlearner.plot_importance(X=X, tau=rlearner_tau, method='auto')

Feature Importance (method = permutation)

In [106]:
rlearner.get_importance(X=X, tau=rlearner_tau, method='permutation')
Out[106]:
{'treatment_A': Feature_000    0.448229
 Feature_001    0.402580
 Feature_012    0.102679
 Feature_018    0.099743
 Feature_007    0.095888
 Feature_005    0.094239
 Feature_022    0.088104
 Feature_006    0.086349
 Feature_004    0.085386
 Feature_021    0.083725
 Feature_003    0.083208
 Feature_008    0.080986
 Feature_009    0.080418
 Feature_013    0.075372
 Feature_014    0.069872
 Feature_024    0.068953
 Feature_020    0.057205
 Feature_002    0.056940
 Feature_017    0.054044
 Feature_011    0.050657
 Feature_010    0.049588
 Feature_015    0.047756
 Feature_023    0.047332
 Feature_019    0.042356
 Feature_016    0.033156
 dtype: float64}
In [107]:
rlearner.plot_importance(X=X, tau=rlearner_tau, method='permutation')

Shapley Values

In [108]:
shap_rlearner = rlearner.get_shap_values(X=X, tau=rlearner_tau)
shap_rlearner
Out[108]:
{'treatment_A': array([[ 0.12166296,  0.05569298, -0.01227441, ..., -0.034672  ,
         -0.02676642, -0.01154799],
        [ 0.21306487,  0.05847596,  0.02468065, ..., -0.00987502,
          0.03105622,  0.01275596],
        [-0.00648983,  0.06220755,  0.01042609, ..., -0.02608381,
          0.00226248,  0.00378111],
        ...,
        [-0.1325076 , -0.16974479,  0.00444393, ..., -0.00296434,
         -0.02358771, -0.00798097],
        [-0.37403068, -0.02412857,  0.00099695, ...,  0.00656961,
          0.01955391,  0.00396616],
        [ 0.17123622,  0.08777265,  0.22690693, ..., -0.02559896,
          0.03512776,  0.01017954]])}
In [109]:
# without providing shap_dict
rlearner.plot_shap_values(X=X, tau=rlearner_tau)
In [110]:
# with providing shap_dict
rlearner.plot_shap_values(shap_dict=shap_rlearner)
In [111]:
# interaction_idx set to None (no color coding for interaction effects)
rlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=0,
                              X=X,
                              tau=rlearner_tau,
                              interaction_idx=None,
                              shap_dict=shap_rlearner)
In [112]:
# interaction_idx set to 'auto' (searches for feature with greatest approximate interaction)
rlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=0,
                              X=X,
                              tau=rlearner_tau,
                              interaction_idx='auto',
                              shap_dict=shap_rlearner)
In [113]:
# interaction_idx set to specific index
rlearner.plot_shap_dependence(treatment_group='treatment_A',
                              feature_idx=1,
                              X=X,
                              tau=rlearner_tau,
                              interaction_idx=20,
                              shap_dict=shap_rlearner)
In [ ]: