预测性维护

预测性维护（PdM）是一种基于数据的预防性维护程序。它是一种主动维护策略，利用传感器在操作过程中监测性能和设备状况。PdM方法不断分析数据，以预测最佳维护时间表。如果使用得当，它可以降低维护成本并防止设备灾难性故障。

在本笔记本中，我们将应用NeuralForecast对经典的PHM2008飞机退化数据集进行监督剩余使用寿命（RUL）估计。

大纲
1. 安装软件包
2. 加载PHM2008飞机退化数据集
3. 拟合和预测NeuralForecast
4. 评估预测结果

您可以使用GPU在Google Colab中运行这些实验。

1. 安装包

# %%capture
# !pip 安装 git+https://github.com/Nixtla/neuralforecast.git

# %%capture
# !pip 安装 git+https://github.com/Nixtla/datasetsforecast.git

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'serif'

from neuralforecast.models import NBEATSx, MLP
from neuralforecast import NeuralForecast
#来自neuralforecast.losses.pytorch模块的分布损失、Huber分位数损失和分位数损失
from neuralforecast.losses.pytorch import HuberLoss, MAE

from datasetsforecast.phm2008 import PHM2008

2. 加载PHM2008航空器退化数据集

在这里我们将加载2008年预后与健康管理挑战数据集。该数据集使用商业模块化航空推进系统仿真来重现不同航空器在正常条件下的涡扇发动机退化过程，这些航空器具有不同的磨损和制造起始条件。训练数据集由完整的失效仿真组成，而测试数据集包括失效前的序列。

Y_train_df, Y_test_df = PHM2008.load(directory='./data', group='FD001', clip_rul=False)
Y_train_df

	unique_id	ds	s_2	s_3	s_4	s_7	s_8	s_9	s_11	s_12	s_13	s_14	s_15	s_17	s_20	s_21	y
0	1	1	641.82	1589.70	1400.60	554.36	2388.06	9046.19	47.47	521.66	2388.02	8138.62	8.4195	392	39.06	23.4190	191
1	1	2	642.15	1591.82	1403.14	553.75	2388.04	9044.07	47.49	522.28	2388.07	8131.49	8.4318	392	39.00	23.4236	190
2	1	3	642.35	1587.99	1404.20	554.26	2388.08	9052.94	47.27	522.42	2388.03	8133.23	8.4178	390	38.95	23.3442	189
3	1	4	642.35	1582.79	1401.87	554.45	2388.11	9049.48	47.13	522.86	2388.08	8133.83	8.3682	392	38.88	23.3739	188
4	1	5	642.37	1582.85	1406.22	554.00	2388.06	9055.15	47.28	522.19	2388.04	8133.80	8.4294	393	38.90	23.4044	187
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
20626	100	196	643.49	1597.98	1428.63	551.43	2388.19	9065.52	48.07	519.49	2388.26	8137.60	8.4956	397	38.49	22.9735	4
20627	100	197	643.54	1604.50	1433.58	550.86	2388.23	9065.11	48.04	519.68	2388.22	8136.50	8.5139	395	38.30	23.1594	3
20628	100	198	643.42	1602.46	1428.18	550.94	2388.24	9065.90	48.09	520.01	2388.24	8141.05	8.5646	398	38.44	22.9333	2
20629	100	199	643.23	1605.26	1426.53	550.68	2388.25	9073.72	48.39	519.67	2388.23	8139.29	8.5389	395	38.29	23.0640	1
20630	100	200	643.85	1600.38	1432.14	550.79	2388.26	9061.48	48.20	519.30	2388.26	8137.33	8.5036	396	38.37	23.0522	0

20631 rows × 17 columns

plot_df1 = Y_train_df[Y_train_df['unique_id']==1]
plot_df2 = Y_train_df[Y_train_df['unique_id']==2]
plot_df3 = Y_train_df[Y_train_df['unique_id']==3]

plt.plot(plot_df1.ds, np.minimum(plot_df1.y, 125), color='#2D6B8F', linestyle='--')
plt.plot(plot_df1.ds, plot_df1.y, color='#2D6B8F', label='Engine 1')

plt.plot(plot_df2.ds, np.minimum(plot_df2.y, 125)+1.5, color='#CA6F6A', linestyle='--')
plt.plot(plot_df2.ds, plot_df2.y+1.5, color='#CA6F6A', label='Engine 2')

plt.plot(plot_df3.ds, np.minimum(plot_df3.y, 125)-1.5, color='#D5BC67', linestyle='--')
plt.plot(plot_df3.ds, plot_df3.y-1.5, color='#D5BC67', label='Engine 3')

plt.ylabel('Remaining Useful Life (RUL)', fontsize=15)
plt.xlabel('Time Cycle', fontsize=15)
plt.legend()
plt.grid()

def smooth(s, b = 0.98):
    v = np.zeros(len(s)+1) #v_0 已经是 0。
    bc = np.zeros(len(s)+1)
    for i in range(1, len(v)): #v_t = 0.95
        v[i] = (b * v[i-1] + (1-b) * s[i-1]) 
        bc[i] = 1 - b**i
    sm = v[1:] / bc[1:]
    return sm

unique_id = 1
plot_df = Y_train_df[Y_train_df.unique_id == unique_id].copy()

fig, axes = plt.subplots(2,3, figsize = (8,5))
fig.tight_layout()

j = -1
#, 's_11', 's_12', 's_13', 's_14', 's_15', 's_17', 's_20', 's_21'
for feature in ['s_2', 's_3', 's_4', 's_7', 's_8', 's_9']:
    if ('s' in feature) and ('smoothed' not in feature):
        j += 1
        axes[j // 3, j % 3].plot(plot_df.ds, plot_df[feature], 
                                 c = '#2D6B8F', label = 'original')
        axes[j // 3, j % 3].plot(plot_df.ds, smooth(plot_df[feature].values), 
                                 c = '#CA6F6A', label = 'smoothed')
        #axes[j // 3, j % 3].plot([10,10],[0,1], c = 'black')
        axes[j // 3, j % 3].set_title(feature)
        axes[j // 3, j % 3].grid()
        axes[j // 3, j % 3].legend()
        
plt.suptitle(f'Engine {unique_id} sensor records')
plt.tight_layout()

3. 拟合和预测 NeuralForecast

NeuralForecast 方法能够解决涉及多种变量的回归问题。回归问题涉及基于目标变量 \(y_{t+h}\) 的滞后 \(y_{:t}\)、时间外的外生特征 \(x^{(h)}_{:t}\)、在预测时可用的外生特征 \(x^{(f)}_{:t+h}\) 和静态特征 \(x^{(s)}\) 来进行预测。

剩余使用寿命（RUL）估计的任务将问题简化为单一时间跨度预测 \(h=1\)，其目标是根据外生特征 \(x^{(f)}_{:t+1}\) 和静态特征 \(x^{(s)}\) 来预测 \(y_{t+1}\)。在 RUL 估计任务中，外生特征通常对应于传感器监测信息，而目标变量则表示 RUL 本身。

\[P(y_{t+1}\;|\;x^{(f)}_{:t+1},x^{(s)})\]

Y_train_df, Y_test_df = PHM2008.load(directory='./data', group='FD001', clip_rul=True)

%%capture
futr_exog_list =['s_2', 's_3', 's_4', 's_7', 's_8', 's_9', 's_11',
                 's_12', 's_13', 's_14', 's_15', 's_17', 's_20', 's_21']

model = NBEATSx(h=1, input_size=24,
                loss=HuberLoss(),
                scaler_type='robust',
                stack_types=['identity', 'identity', 'identity'],
                dropout_prob_theta=0.5,
                futr_exog_list=futr_exog_list,
                exclude_insample_y = True,
                max_steps=1000)
nf = NeuralForecast(models=[model], freq='M')

nf.fit(df=Y_train_df)
Y_hat_df = nf.predict(futr_df=Y_test_df).reset_index() # 默认最后一个窗口？

Global seed set to 1

%%capture
filter_test_df = Y_test_df.groupby('unique_id').tail(31).reset_index()
Y_hat_df2 = nf.cross_validation(df=filter_test_df, n_windows=30, fit_models=False)

4. 评估预测结果

在原始的PHM2008数据集中，测试集的真实剩余使用寿命（RUL）值仅在每个引擎的最后一个时间周期提供。我们将筛选预测结果，仅评估最后一个时间周期。

\[RMSE(\mathbf{y}_{T},\hat{\mathbf{y}}_{T}) = \sqrt{\frac{1}{|\mathcal{D}_{test}|} \sum_{i} (y_{i,T}-\hat{y}_{i,T})^{2}}\]

\[R2(\mathbf{y}_{T},\hat{\mathbf{y}}_{T}) = 1- \frac{\sum_{i} (y_{i,T}-\hat{y}_{i,T})^{2}}{\sum_{i} (y_{i,T}-\bar{y}_{i,T})^{2}}\]

from sklearn.metrics import r2_score
from neuralforecast.losses.numpy import rmse

model_name = repr(nf.models[0])
y_last = Y_test_df[['unique_id', 'y']].groupby('unique_id').last().reset_index()
y_hat_last = Y_hat_df[['unique_id', model_name]].groupby('unique_id').last().reset_index()
y_last = y_last['y']
y_hat_last = y_hat_last[model_name]

rmse_eval = rmse(y=y_last, y_hat=y_hat_last)
r2_eval = r2_score(y_true=y_last, y_pred=y_hat_last)

print(f'{model_name} Prognosis Evaluation')
print(f'RMSE:\t {rmse_eval:.3f}')
print(f'R2:\t {r2_eval:.3f}')

NBEATSx Prognosis Evaluation
RMSE:    4.119
R2:  0.989

plt.scatter(y_last, y_hat_last)
plt.xlabel('True RUL', fontsize=15)
plt.ylabel('RUL Prediction', fontsize=15)
plt.grid()

plot_df1 = Y_hat_df2[Y_hat_df2['unique_id']==1]
plot_df2 = Y_hat_df2[Y_hat_df2['unique_id']==2]
plot_df3 = Y_hat_df2[Y_hat_df2['unique_id']==3]

plt.plot(plot_df1.ds, plot_df1['y'], c='#2D6B8F', label='E1 true RUL')
plt.plot(plot_df1.ds, plot_df1[model_name]+1, c='#2D6B8F', linestyle='--', label='E1 predicted RUL')

plt.plot(plot_df1.ds, plot_df2['y'], c='#CA6F6A', label='E2 true RUL')
plt.plot(plot_df1.ds, plot_df2[model_name]+1, c='#CA6F6A', linestyle='--', label='E2 predicted RUL')

plt.plot(plot_df1.ds, plot_df3['y'], c='#D5BC67', label='E3 true RUL')
plt.plot(plot_df1.ds, plot_df3[model_name]+1, c='#D5BC67', linestyle='--', label='E3 predicted RUL')

plt.legend()
plt.grid()

参考文献

R. Keith Mobley (2002). “预测性维护简介”
Saxena, A., Goebel, K., Simon, D.,&Eklund, N. (2008). “飞机发动机失效模拟的损伤传播建模”. 国际预测与健康管理会议.

Give us a ⭐ on Github