双月分类:改进的不确定性量化¶
在本笔记本中,我们将看到如何修复模型对远离训练数据的输入的过度自信。我们将使用两种不同的方法来实现这一点;让我们直接开始吧!
设置¶
从scikit-learn下载双月数据¶
首先,让我们从scikit-learn下载双月数据。
[ ]:
TRAIN_DATA_SIZE = 500
from sklearn.datasets import make_moons
train_data = make_moons(n_samples=TRAIN_DATA_SIZE, noise=0.1, random_state=0)
val_data = make_moons(n_samples=500, noise=0.1, random_state=1)
test_data = make_moons(n_samples=500, noise=0.1, random_state=2)
将数据转换为兼容的数据加载器¶
Fortuna 帮助你将数据和数据加载器转换为 Fortuna 可以消化的数据加载器。
[ ]:
from fortuna.data import DataLoader
train_data_loader = DataLoader.from_array_data(
train_data, batch_size=256, shuffle=True, prefetch=True
)
val_data_loader = DataLoader.from_array_data(val_data, batch_size=256, prefetch=True)
test_data_loader = DataLoader.from_array_data(test_data, batch_size=256, prefetch=True)
定义一些用于绘制估计不确定性的工具¶
[ ]:
import matplotlib.pyplot as plt
import numpy as np
from fortuna.data import InputsLoader
from fortuna.prob_model import ProbClassifier
import jax.numpy as jnp
def get_grid_inputs_loader(grid_size: int = 100):
xx = np.linspace(-3, 4, grid_size)
yy = np.linspace(-1.5, 2, grid_size)
grid = np.array([[_xx, _yy] for _xx in xx for _yy in yy])
grid_inputs_loader = InputsLoader.from_array_inputs(grid)
grid = grid.reshape(grid_size, grid_size, 2)
return grid, grid_inputs_loader
def compute_test_modes(prob_model: ProbClassifier, test_data_loader: DataLoader):
test_inputs_loader = test_data_loader.to_inputs_loader()
test_means = prob_model.predictive.mean(inputs_loader=test_inputs_loader)
return prob_model.predictive.mode(
inputs_loader=test_inputs_loader, means=test_means
)
def plot_uncertainty_over_grid(
grid: jnp.ndarray, scores: jnp.ndarray, test_modes: jnp.ndarray, title: str, ax=None
):
scores = scores.reshape(grid.shape[0], grid.shape[1])
if ax is None:
fig, ax = plt.subplots()
ax.set_title(title, fontsize=12)
pcm = ax.imshow(
scores.T,
origin="lower",
extent=(grid[0][0][0], grid[-1][0][0], grid[0][0][1], grid[0][-1][1]),
interpolation="bicubic",
aspect="auto",
)
# Plot training data.
im = ax.scatter(
test_data[0][:, 0],
test_data[0][:, 1],
s=3,
c=["C0" if i == 1 else "C1" for i in test_modes],
)
plt.colorbar(im, ax=ax.ravel().tolist() if hasattr(ax, "ravel") else ax)
定义确定性模型¶
在本教程中,我们将使用深度残差网络,有关模型的更多详细信息,请参见fortuna.model.mlp.DeepResidualNet。
[ ]:
from fortuna.model.mlp import DeepResidualNet
import flax.linen as nn
output_dim = 2
model = DeepResidualNet(
output_dim=output_dim,
activations=(nn.relu, nn.relu, nn.relu, nn.relu, nn.relu, nn.relu),
widths=(128, 128, 128, 128, 128, 128),
dropout_rate=0.1,
)
[ ]:
from fortuna.prob_model import MAPPosteriorApproximator
from fortuna.prob_model import FitConfig, FitMonitor, FitOptimizer
from fortuna.metric.classification import accuracy
prob_model = ProbClassifier(
model=model,
posterior_approximator=MAPPosteriorApproximator(),
output_calibrator=None,
)
status = prob_model.train(
train_data_loader=train_data_loader,
val_data_loader=val_data_loader,
calib_data_loader=val_data_loader,
fit_config=FitConfig(
monitor=FitMonitor(metrics=(accuracy,)),
optimizer=FitOptimizer(n_epochs=100),
),
)
[ ]:
test_modes = compute_test_modes(prob_model, test_data_loader)
grid, grid_inputs_loader = get_grid_inputs_loader(grid_size=100)
grid_entropies = prob_model.predictive.entropy(grid_inputs_loader)
plot_uncertainty_over_grid(
grid=grid,
scores=grid_entropies,
test_modes=test_modes,
title="Predictive uncertainty with MAP",
)
plt.show()
显然,模型对于远离训练数据的输入过于自信。这种行为并不是我们所期望的,因为我们更希望模型在分布外输入上不那么自信。
拟合一个OOD分类器以区分分布内和分布外输入¶
给定上述训练好的模型,我们现在可以使用Fortuna提供的一个模型来实际提高模型在分布外输入上的置信度。在下面的示例中,我们将使用两种方法:
基于马氏距离的分类器,由[Lee et al., 2018]引入
深度确定性不确定性 (DDU) [Mukhoti et al., 2022].
在下面的代码块中,我们定义了一个feature_extractor_subnet,这是我们之前训练模型的一个子网络,它将输入向量转换为嵌入向量。在这个例子中,特征提取器被视为我们原始模型(DeepResidualNet)去掉输出层后的部分。
[ ]:
from fortuna.model.mlp import DeepResidualFeatureExtractorSubNet
import jax
feature_extractor_subnet = DeepResidualFeatureExtractorSubNet(
dense=model.dense,
widths=model.widths,
activations=model.activations,
dropout=model.dropout,
dropout_rate=model.dropout_rate,
)
@jax.jit
def _apply(inputs, params, mutable):
variables = {"params": params["model"]["params"]["dfe_subnet"].unfreeze()}
if mutable is not None:
mutable_variables = {
k: v["dfe_subnet"].unfreeze() for k, v in mutable["model"].items()
}
variables.update(mutable_variables)
return feature_extractor_subnet.apply(variables, inputs, train=False, mutable=False)
让我们使用特征提取器来获取训练和OOD输入的嵌入。
[ ]:
from typing import Tuple
import tqdm
from fortuna.data.loader.base import BaseDataLoaderABC, BaseInputsLoader
from fortuna.prob_model.posterior.state import PosteriorState
from fortuna.typing import Array
def get_embeddings_and_targets(
state: PosteriorState, train_data_loader: BaseDataLoaderABC
) -> Tuple[Array, Array]:
train_labels = []
train_embeddings = []
for x, y in tqdm.tqdm(train_data_loader, desc="Computing embeddings: "):
train_embeddings.append(
_apply(inputs=x, params=state.params, mutable=state.mutable)
)
train_labels.append(y)
train_embeddings = jnp.concatenate(train_embeddings, 0)
train_labels = jnp.concatenate(train_labels)
return train_embeddings, train_labels
def get_embeddings(state: PosteriorState, inputs_loader: BaseInputsLoader):
return jnp.concatenate(
[
_apply(inputs=x, params=state.params, mutable=state.mutable)
for x in inputs_loader
],
0,
)
state = prob_model.posterior.state.get()
train_embeddings, train_labels = get_embeddings_and_targets(
state=state, train_data_loader=train_data_loader
)
[ ]:
from fortuna.ood_detection import (
MalahanobisOODClassifier,
DeepDeterministicUncertaintyOODClassifier,
)
maha_classifier = MalahanobisOODClassifier(num_classes=2)
maha_classifier.fit(embeddings=train_embeddings, targets=train_labels)
ddu_classifier = DeepDeterministicUncertaintyOODClassifier(num_classes=2)
ddu_classifier.fit(embeddings=train_embeddings, targets=train_labels)
让我们绘制结果!为了可视化的目的,我们使用从已知分布源获得的最大分数为OOD分类器分数设置了一个阈值。
[ ]:
grid, grid_inputs_loader = get_grid_inputs_loader(grid_size=100)
grid_embeddings = get_embeddings(state=state, inputs_loader=grid_inputs_loader)
ind_embeddings = get_embeddings(
state=state, inputs_loader=val_data_loader.to_inputs_loader()
)
ind_maha_scores = maha_classifier.score(embeddings=ind_embeddings)
grid_maha_scores = maha_classifier.score(embeddings=grid_embeddings)
maha_threshold = 2 * ind_maha_scores.max()
grid_maha_scores = jnp.where(
grid_maha_scores < maha_threshold, grid_maha_scores, maha_threshold
)
ind_ddu_scores = maha_classifier.score(embeddings=ind_embeddings)
grid_ddu_scores = ddu_classifier.score(embeddings=grid_embeddings)
ddu_threshold = 2 * ind_ddu_scores.max()
grid_ddu_scores = jnp.where(
grid_ddu_scores < ddu_threshold, grid_ddu_scores, ddu_threshold
)
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
plot_uncertainty_over_grid(
grid=grid,
scores=grid_maha_scores,
test_modes=test_modes,
title="Mahalanobis OOD scores",
ax=axes[0],
)
plot_uncertainty_over_grid(
grid=grid,
scores=grid_ddu_scores,
test_modes=test_modes,
title="DDU OOD scores",
ax=axes[1],
)
plt.tight_layout()
plt.show()
两种方法都改善了分布外过度自信的问题!虽然马氏距离无法解决靠近和月亮之间的过度自信问题,但DDU也成功地解决了这个问题。
SNGP模型¶
我们现在将探索一种旨在解决过度自信OOD的不同方法,即谱归一化高斯过程(SNGP) [Liu et al., 2020]。
SNGP 具有两个主要特征:
对深度学习模型的所有密集(或卷积)层应用了谱归一化。
密集输出层被替换为高斯过程层。
让我们看看如何在Fortuna中使用SNGP。
为了向确定性网络添加谱归一化,我们只需要定义一个新的深度特征提取器,它继承自确定性模型使用的特征提取器(在本例中为MLPDeepFeatureExtractorSubNet)和WithSpectralNorm。值得注意的是,WithSpectralNorm应保持不变,而深度特征提取器可以替换为任何自定义对象:
[ ]:
from fortuna.model.mlp import DeepResidualFeatureExtractorSubNet
from fortuna.model.utils.spectral_norm import WithSpectralNorm
class SNGPDeepFeatureExtractorSubNet(
WithSpectralNorm, DeepResidualFeatureExtractorSubNet
):
pass
然后,我们可以通过以下方式定义我们的SNGP模型:
替换深度特征提取器:从
MLPDeepFeatureExtractorSubNet到SNGPDeepFeatureExtractorSubNet使用
SNGPPosteriorApproximator作为ProbModel的posterior_approximator。
不需要其他任何东西,Fortuna 会为你处理剩下的一切!
[ ]:
import jax.numpy as jnp
from fortuna.prob_model.prior import IsotropicGaussianPrior
from fortuna.prob_model import SNGPPosteriorApproximator
output_dim = 2
model = SNGPDeepFeatureExtractorSubNet(
activations=tuple([nn.relu] * 6),
widths=tuple([128] * 6),
dropout_rate=0.1,
spectral_norm_bound=0.9,
)
prob_model = ProbClassifier(
model=model,
prior=IsotropicGaussianPrior(
log_var=jnp.log(1.0 / 1e-4) - jnp.log(TRAIN_DATA_SIZE)
),
posterior_approximator=SNGPPosteriorApproximator(output_dim=output_dim),
output_calibrator=None,
)
初始化SNGPPosteriorApproximator时唯一必需的参数是output_dim,应将其设置为分类任务中的类别数量。可以设置其他超参数以进一步提高性能 - 为了更好地理解这些参数,请查看[Liu et al., 2020]。
我们现在可以像往常一样训练模型了:
[ ]:
status = prob_model.train(
train_data_loader=train_data_loader,
val_data_loader=val_data_loader,
calib_data_loader=val_data_loader,
fit_config=FitConfig(
monitor=FitMonitor(metrics=(accuracy,)),
optimizer=FitOptimizer(n_epochs=100),
),
)
[ ]:
test_sngp_modes = compute_test_modes(prob_model, test_data_loader)
grid, grid_inputs_loader = get_grid_inputs_loader(grid_size=100)
grid_sngp_entropies = prob_model.predictive.entropy(grid_inputs_loader)
plot_uncertainty_over_grid(
grid=grid,
scores=grid_sngp_entropies,
test_modes=test_sngp_modes,
title="Predictive uncertainty with SNGP",
)
plt.show()
与上述的Mahalanobis和DDU方法类似,SNGP也能够有效地解决过度自信的问题。SNGP提供的不确定性与上述DDU获得的分数相似,但更加平滑且不易过拟合。
下图将所有上述获得的图形进行了比较。
[ ]:
fig, axes = plt.subplots(1, 4, figsize=(20, 5))
plot_uncertainty_over_grid(
grid=grid,
scores=grid_entropies,
test_modes=test_modes,
title="Predictive uncertainty with MAP",
ax=axes[0],
)
plot_uncertainty_over_grid(
grid=grid,
scores=grid_maha_scores,
test_modes=test_modes,
title="Mahalanobis OOD scores",
ax=axes[1],
)
plot_uncertainty_over_grid(
grid=grid,
scores=grid_ddu_scores,
test_modes=test_modes,
title="DDU OOD scores",
ax=axes[2],
)
plot_uncertainty_over_grid(
grid=grid,
scores=grid_sngp_entropies,
test_modes=test_sngp_modes,
title="Predictive uncertainty with SNGP",
ax=axes[3],
)
plt.tight_layout()
plt.show()
[ ]: