注意
转到末尾 以下载完整的示例代码
早期分类¶
这个例子展示了早期分类的概念。
早期分类器在
tslearn.early_classification 模块中实现,在这个例子中
我们使用了来自[1]的方法。
[1] A. Dachraoui, A. Bondu & A. Cornuejols. 时间序列的早期分类作为一个非近视的序列决策问题。ECML/PKDD 2015
# Author: Romain Tavenard
# License: BSD 3 clause
# sphinx_gallery_thumbnail_number = 2
import numpy
import matplotlib.pyplot as plt
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.early_classification import NonMyopicEarlyClassifier
from tslearn.datasets import UCR_UEA_datasets
def plot_partial(time_series, t, y_true=0, y_pred=0, color="k"):
plt.plot(time_series[:t+1].ravel(), color=color, linewidth=1.5)
plt.plot(numpy.arange(t+1, time_series.shape[0]),
time_series[t+1:].ravel(),
linestyle="dashed", color=color, linewidth=1.5)
plt.axvline(x=t, color=color, linewidth=1.5)
plt.text(x=t - 20, y=time_series.max() - .25, s="Prediction time")
plt.title(
"Sample of class {} predicted as class {}".format(y_true, y_pred)
)
plt.xlim(0, time_series.shape[0] - 1)
数据加载和可视化¶
numpy.random.seed(0)
X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset("ECG200")
# Scale time series
X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
X_test = TimeSeriesScalerMeanVariance().fit_transform(X_test)
size = X_train.shape[1]
n_classes = len(set(y_train))
plt.figure()
for i, cl in enumerate(set(y_train)):
plt.subplot(n_classes, 1, i + 1)
for ts in X_train[y_train == cl]:
plt.plot(ts.ravel(), color="orange" if cl > 0 else "blue", alpha=.3)
plt.xlim(0, size - 1)
plt.suptitle("Training time series")
plt.show()
模型拟合¶
如下图所示,由NonMyopicEarlyClassifier估计的最佳分类时间是数据依赖的。
early_clf = NonMyopicEarlyClassifier(n_clusters=3,
cost_time_parameter=1e-3,
lamb=1e2,
random_state=0)
early_clf.fit(X_train, y_train)
preds, times = early_clf.predict_class_and_earliness(X_test)
plt.figure()
plt.subplot(2, 1, 1)
ts_idx = 0
t = times[ts_idx]
plot_partial(X_test[ts_idx], t, y_test[ts_idx], preds[ts_idx], color="orange")
plt.subplot(2, 1, 2)
ts_idx = 9
t = times[ts_idx]
plot_partial(X_test[ts_idx], t, y_test[ts_idx], preds[ts_idx], color="blue")
plt.tight_layout()
plt.show()
早期性与准确性的权衡¶
通过cost_time_parameter控制早期性和准确性之间的权衡。
plt.figure()
hatches = ["///", "\\\\\\", "*"]
for i, cost_t in enumerate([1e-4, 1e-3, 1e-2]):
early_clf.set_params(cost_time_parameter=cost_t)
early_clf.fit(X_train, y_train)
preds, times = early_clf.predict_class_and_earliness(X_test)
plt.hist(times,
alpha=.5, hatch=hatches[i],
density=True,
label="$\\alpha={}$".format(cost_t),
bins=numpy.arange(0, size, 5))
plt.legend(loc="upper right")
plt.xlim(0, size - 1)
plt.xlabel("Prediction times")
plt.title("Impact of cost_time_parameter ($\\alpha$)")
plt.show()
脚本的总运行时间: (0 分钟 11.767 秒)