注意
转到末尾 以下载完整的示例代码
将发现的shapelets与时间序列对齐¶
此示例说明了使用“学习形状”方法来学习一组能够线性分离时间序列的形状。在此示例中,我们将提取一个形状以区分“Trace”数据集中的两个类别。之后,我们展示了如何通过将形状沿每个时间序列对齐来将我们的时间序列转换为距离。这种对齐是通过将较小的形状在较长的时间序列上移动并取最小的逐点距离来执行的。
有关该方法的更多信息可以在以下网址找到: http://fs.ismll.de/publicspace/LearningShapelets/。
WARNING:absl:`lr` is deprecated in Keras optimizer, please use `learning_rate` or use the legacy optimizer, e.g.,tf.keras.optimizers.legacy.Adam.
# Author: Romain Tavenard
# License: BSD 3 clause
import numpy
import matplotlib.pyplot as plt
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMinMax
from tslearn.shapelets import LearningShapelets, \
grabocka_params_to_shapelet_size_dict
from tensorflow.keras.optimizers import Adam
# Set a seed to ensure determinism
numpy.random.seed(42)
# Load the Trace dataset
X_train, y_train, _, _ = CachedDatasets().load_dataset("Trace")
# Filter out classes 2 and 4
mask = numpy.isin(y_train, [1, 3])
X_train = X_train[mask]
y_train = y_train[mask]
# Normalize the time series
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)
# Get statistics of the dataset
n_ts, ts_sz = X_train.shape[:2]
n_classes = len(set(y_train))
# We will extract 1 shapelet and align it with a time series
shapelet_sizes = {20: 1}
# Define the model and fit it using the training data
shp_clf = LearningShapelets(n_shapelets_per_size=shapelet_sizes,
weight_regularizer=0.001,
optimizer=Adam(lr=0.01),
max_iter=250,
verbose=0,
scale=False,
random_state=42)
shp_clf.fit(X_train, y_train)
# Get the number of extracted shapelets, the (minimal) distances from
# each of the timeseries to each of the shapelets, and the corresponding
# locations (index) where the minimal distance was found
n_shapelets = sum(shapelet_sizes.values())
distances = shp_clf.transform(X_train)
predicted_locations = shp_clf.locate(X_train)
f, ax = plt.subplots(2, 1, sharex=True)
# Plot the shapelet and align it on the best matched time series. The optimizer
# will often enlarge the shapelet to create a larger gap between the distances
# of both classes. We therefore normalize the shapelet again before plotting.
test_ts_id = numpy.argmin(numpy.sum(distances, axis=1))
shap = shp_clf.shapelets_[0]
shap = TimeSeriesScalerMinMax().fit_transform(shap.reshape(1, -1, 1)).flatten()
pos = predicted_locations[test_ts_id, 0]
ax[0].plot(X_train[test_ts_id].ravel())
ax[0].plot(numpy.arange(pos, pos + len(shap)), shap, linewidth=2)
ax[0].axvline(pos, color='k', linestyle='--', alpha=0.25)
ax[0].set_title("The aligned extracted shapelet")
# We calculate the distances from the shapelet to the timeseries ourselves.
distances = []
time_series = X_train[test_ts_id].ravel()
for i in range(len(time_series) - len(shap)):
distances.append(numpy.linalg.norm(time_series[i:i+len(shap)] - shap))
ax[1].plot(distances)
ax[1].axvline(numpy.argmin(distances), color='k', linestyle='--', alpha=0.25)
ax[1].set_title('The distances between the time series and the shapelet')
plt.tight_layout()
plt.show()
脚本总运行时间: (0 分钟 6.132 秒)