自定义交互#
在本笔记本中,我们将禁用解释API中内置的自动交互检测功能,而是自己检测交互,然后将它们合并到EBM中。我们还将检测并使用三向交互,这些通常不需要,但有时可能有用。
这个笔记本可以在我们的examples folder在GitHub上找到。
# install interpret if not already installed
try:
import interpret
except ModuleNotFoundError:
!pip install --quiet interpret pandas scikit-learn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import set_visualize_provider
from interpret.provider import InlineProvider
set_visualize_provider(InlineProvider())
df = pd.read_csv(
"https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",
header=None)
df.columns = [
"Age", "WorkClass", "fnlwgt", "Education", "EducationNum",
"MaritalStatus", "Occupation", "Relationship", "Race", "Gender",
"CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry", "Income"
]
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
seed = 42
np.random.seed(seed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=seed)
构建一个主模型
ebm1 = ExplainableBoostingClassifier(random_state=seed, interactions=0)
ebm1.fit(X_train, y_train)
ExplainableBoostingClassifier(interactions=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
ExplainableBoostingClassifier(interactions=0)
确定配对
from interpret.utils import measure_interactions
from itertools import combinations
n_features = X_train.shape[1]
pairs = measure_interactions(X_train, y_train, interactions=combinations(range(n_features), 2), init_score=ebm1)
pairs = [interaction for interaction, strength in pairs[:10]] # select the top 10 pairs
构建一个纯对模型
ebm2 = ExplainableBoostingClassifier(random_state=seed, exclude="mains", interactions=pairs)
ebm2.fit(X_train, y_train, init_score=ebm1)
# modify ebm2 slightly to not have any bins without type definitions
ebm2.bins_ = [l1 if len(l2) == 0 else l2 for l1, l2 in zip(ebm1.bins_, ebm2.bins_)]
将主要部分和纯对合并为一个单一模型
from interpret.glassbox import merge_ebms
ebm_pairs = merge_ebms([ebm1, ebm2])
# There is no overlap between these EBMs, so merge_ebms will consider
# the non-overlapping terms as having zeros for scores in the other model.
# Undo this by multiplying the scores by 2.0. Also reduce the bin_weights_
# since we're merging the same underlying features.
for i in range(len(ebm_pairs.term_features_)):
ebm_pairs.scale(i, 2.0)
ebm_pairs.bin_weights_[i] *= 0.5
# add intercepts since we're not trying to average the models
ebm_pairs.intercept_ = ebm1.intercept_ + ebm2.intercept_
ebm_pairs.bagged_intercept_ = None
ebm_pairs.bagged_scores_ = None
ebm_pairs.standard_deviations_ = None
确定三元组
triples = measure_interactions(X_train, y_train, interactions=combinations(range(n_features), 3), init_score=ebm_pairs)
triples = [interaction for interaction, strength in triples[:10]] # select the top 10 triples
构建一个纯三元EBM
ebm3 = ExplainableBoostingClassifier(random_state=seed, exclude="mains", interactions=triples)
ebm3.fit(X_train, y_train, init_score=ebm_pairs)
# modify ebm3 slightly to not have any bins without type definitions
ebm3.bins_ = [l1 if len(l3) == 0 else l3 for l1, l3 in zip(ebm1.bins_, ebm3.bins_)]
/opt/hostedtoolcache/Python/3.9.20/x64/lib/python3.9/site-packages/interpret/glassbox/_ebm/_ebm.py:1335: UserWarning: Interactions with 3 or more terms are not graphed in global explanations. Local explanations are still available and exact.
warn(
将主要、配对和三重组合并为一个单一模型
ebm_triples = merge_ebms([ebm1, ebm2, ebm3])
# There is no overlap between these EBMs, so merge_ebms will consider
# the non-overlappig terms as having zeros for scores in the other model.
# Undo this by multiplying the scores by 3.0. Also reduce the bin_weights_
# since we're merging the same underlying features.
for i in range(len(ebm_triples.term_features_)):
ebm_triples.scale(i, 3.0)
ebm_triples.bin_weights_[i] *= 1.0/3.0
# add intercepts since we're not trying to average the models
ebm_triples.intercept_ = ebm1.intercept_ + ebm2.intercept_ + ebm3.intercept_
ebm_triples.bagged_intercept_ = None
ebm_triples.bagged_scores_ = None
ebm_triples.standard_deviations_ = None
评估EBMs
from sklearn.metrics import log_loss
loss1 = log_loss(y_test, ebm1.predict_proba(X_test))
print(loss1)
loss2 = log_loss(y_test, ebm_pairs.predict_proba(X_test))
print(loss2)
# compare our custom pair EBM with an EBM built to auto-discover the pairs
ebm_default = ExplainableBoostingClassifier(random_state=seed, interactions=10)
ebm_default.fit(X_train, y_train)
loss2_default = log_loss(y_test, ebm_default.predict_proba(X_test))
print(loss2_default)
loss3 = log_loss(y_test, ebm_triples.predict_proba(X_test))
print(loss3)
0.27291582649099
0.27239237404216576
0.2723551286416435
0.272391223708688