%%capture
!pip install datasetsforecast hierarchicalforecast
!pip install git+https://github.com/Nixtla/neuralforecast.git层次预测
本笔记本提供了创建层次预测管道的逐步指南。
在该管道中,我们将使用 NeuralForecast 和 HINT 类来创建拟合、预测和调和预测。
我们将使用 TourismL 数据集,该数据集总结了澳大利亚国家访客调查的结果。
大纲
1. 安装包
2. 加载层次数据集
3. 拟合和预测 HINT
4. 预测评估
您可以使用 Google Colab 通过 GPU 运行这些实验。
1. 安装包
2. 加载层次数据集
该详细的澳大利亚旅游数据集来自由澳大利亚旅游研究管理的国家游客调查,它由1998年至2016年的555个月度系列组成,按地理位置和旅行目的进行组织。自然地理层次包含七个州,进一步划分为27个区域和76个地区。旅行目的类别包括度假、探访朋友和亲戚(VFR)、商务及其他。MinT(Wickramasuriya等,2019)在过去的层次预测研究中使用了该数据集。该数据集可以在MinT对账网页上访问,尽管还有其他来源可用。
| 地理划分 | 每个划分的系列数量 | 按目的划分的系列数量 | 总计 |
|---|---|---|---|
| 澳大利亚 | 1 | 4 | 5 |
| 各州 | 7 | 28 | 35 |
| 区域 | 27 | 108 | 135 |
| 地区 | 76 | 304 | 380 |
| 总计 | 111 | 444 | 555 |
import pandas as pd
import matplotlib.pyplot as plt
from datasetsforecast.hierarchical import HierarchicalData
from hierarchicalforecast.utils import aggregate, HierarchicalPlot
from neuralforecast.utils import augment_calendar_df
def sort_df_hier(Y_df, S):
# NeuralForecast 核心,按字典顺序对 unique_id 进行排序
# 默认情况下,此方法匹配 S_df 和 Y_hat_df 的层级顺序。
Y_df.unique_id = Y_df.unique_id.astype('category')
Y_df.unique_id = Y_df.unique_id.cat.set_categories(S.index)
Y_df = Y_df.sort_values(by=['unique_id', 'ds'])
return Y_df
# 加载分层数据集
Y_df, S_df, tags = HierarchicalData.load('./data', 'TourismLarge')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
Y_df = sort_df_hier(Y_df, S_df)
Y_df, _ = augment_calendar_df(df=Y_df, freq='M')从数学上讲,层次多元时间序列可以用向量 \(\mathbf{y}_{[a,b],t}\) 表示,该向量由以下聚合约束定义:
\[ \mathbf{y}_{[a,b],t} = \mathbf{S}_{[a,b][b]} \mathbf{y}_{[b],t} \quad \Leftrightarrow \quad \begin{bmatrix}\mathbf{y}_{[a],t} \\ %\hline \mathbf{y}_{[b],t}\end{bmatrix} = \begin{bmatrix} \mathbf{A}_{[a][b]}\\ %\hline \mathbf{I}_{[b][b]} \end{bmatrix} \mathbf{y}_{[b],t} \]
其中 \(\mathbf{y}_{[a],t}\) 是聚合系列,\(\mathbf{y}_{[b],t}\) 是底层系列,\(\mathbf{S}_{[a,b][b]}\) 是层次聚合约束。
# 在此,我们绘制了层次约束矩阵。
hplot = HierarchicalPlot(S=S_df, tags=tags)
hplot.plot_summing_matrix()
# 在此,我们绘制了数据集中最靠前的系列。
# 这相当于澳大利亚每月接待的游客总数
plt.figure(figsize=(10,5))
plt.plot(Y_df[Y_df['unique_id']=='TotalAll']['ds'],
Y_df[Y_df['unique_id']=='TotalAll']['y'], label='target')
plt.plot(Y_df[Y_df['unique_id']=='TotalAll']['ds'],
Y_df[Y_df['unique_id']=='TotalAll']['month']*80000, label='month dummy')
plt.xlabel('Date')
plt.ylabel('Tourist Visits')
plt.legend()
plt.grid()
plt.show()
plt.close()
3. 拟合与预测 HINT
分层预测网络(HINT)将三个组成部分组合成一个易于使用的模型:
1. 最先进的神经网络预测模型。
2. 高效且灵活的多变量概率分布。
3. 内置的协调能力。
import numpy as np
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATSx, NHITS, HINT
from neuralforecast.losses.pytorch import GMM, PMM, DistributionLoss, sCRPS# 训练测试分割
horizon = 12
Y_test_df = Y_df.groupby('unique_id').tail(horizon)
Y_train_df = Y_df.drop(Y_test_df.index)
Y_test_df = Y_test_df.set_index('unique_id')
Y_train_df = Y_train_df.set_index('unique_id')# 地平线与分位数
level = np.arange(0, 100, 2)
qs = [[50-lv/2, 50+lv/2] if lv!=0 else [50] for lv in level]
quantiles = np.sort(np.concatenate(qs)/100)
# HINT := 基础网络 + 分布式系统 + 协调机制
nhits = NHITS(h=horizon,
input_size=24,
loss=GMM(n_components=10, quantiles=quantiles),
hist_exog_list=['month'],
max_steps=2000,
early_stop_patience_steps=10,
val_check_steps=50,
scaler_type='robust',
learning_rate=1e-3,
valid_loss=sCRPS(quantiles=quantiles))
model = HINT(h=horizon, S=S_df.values,
model=nhits, reconciliation='BottomUp')INFO:lightning_fabric.utilities.seed:Global seed set to 1
%%capture
Y_df['y'] = Y_df['y'] * (Y_df['y'] > 0)
nf = NeuralForecast(models=[model], freq='MS')
# Y_hat_df = nf.cross_validation(df=Y_df, val_size=12, n_windows=1)
nf.fit(df=Y_train_df, val_size=12)
Y_hat_df = nf.predict()unique_id = 'TotalAll'
Y_plot_df = Y_df[Y_df.unique_id==unique_id].tail(12*5)
Y_hat_df = Y_hat_df.reset_index()
plot_df = Y_hat_df[Y_hat_df.unique_id==unique_id]
plot_df = Y_plot_df.merge(plot_df, on=['unique_id', 'ds'], how='left')
plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
plt.plot(plot_df['ds'], plot_df['HINT-median'], c='blue', label='median')
plt.fill_between(x=plot_df['ds'][-12:],
y1=plot_df['HINT-lo-90.0'][-12:].values,
y2=plot_df['HINT-hi-90.0'][-12:].values,
alpha=0.4, label='level 90')
plt.legend()
plt.grid()
plt.plot()
4. 预测评估
为了评估一致的概率预测,我们使用缩放的连续排名概率分数(sCRPS),定义如下:
\[ \mathrm{CRPS}(\hat{F}_{[a,b],\tau},\mathbf{y}_{[a,b],\tau}) = \frac{2}{N_{a}+N_{b}} \sum_{i} \int^{1}_{0} \mathrm{QL}(\hat{F}_{i,\tau}, y_{i,\tau})_{q} dq \]
\[ \mathrm{sCRPS}(\hat{F}_{[a,b\,],\tau},\mathbf{y}_{[a,b\,],\tau}) = \frac{\mathrm{CRPS}(\hat{F}_{[a,b\,],\tau},\mathbf{y}_{[a,b\,],\tau})}{\sum_{i} | y_{i,\tau} |} \]
正如您所看到的,HINT模型在最小调优下高效地实现了最先进的准确性。
from hierarchicalforecast.evaluation import scaled_crps
def _get_hierarchical_scrps(hier_idxs, Y, Yq_hat, quantiles):
# 我们使用从聚合标签中获得的索引。
# 计算各层级层次结构中的缩放CRPS
scrps_list = []
for idxs in hier_idxs:
y = Y[idxs, :]
yq_hat = Yq_hat[idxs, :, :]
scrps = scaled_crps(y, yq_hat, quantiles)
scrps_list.append(scrps)
return scrps_list
hier_idxs = [np.arange(len(S_df))] +\
[S_df.index.get_indexer(tags[level]) for level in list(tags.keys())]%%capture
n_series = len(S_df)
n_quantiles = len(quantiles)
# Bootstrap预测
n_samples = 5
Y_hat_df_list = [nf.predict() for _ in range(n_samples)]
# 解析 y_test 和 y_rec
# 仅保留Y_hat_df中的分位数列
# 移除均值和中位数默认输出
model_name = type(model).__name__
quantile_columns = [model_name + n for n in nhits.loss.output_names]
quantile_columns.remove(model_name)
Yq_hat = []
for sample_idx in range(n_samples):
Y_hat = Y_hat_df_list[sample_idx][quantile_columns].values
Yq_hat.append(Y_hat.reshape(1, n_series, horizon, n_quantiles))
Yq_hat = np.concatenate(Yq_hat, axis=0)
Y_test = Y_test_df['y'].values.reshape(n_series, horizon)print('Y_test.shape [n_series, horizon]', Y_test.shape)
print('Yq_hat.shape [n_samples, n_series, horizon, n_quantiles]', Yq_hat.shape)
# 计算自举的sCRPS
scrps_hint = [_get_hierarchical_scrps(hier_idxs, Y_test, Yq_hat[sample_idx], quantiles) \
for sample_idx in range(n_samples)]
crps_mean = np.mean(np.array(scrps_hint), axis=0)
crps_std = np.std(np.array(scrps_hint), axis=0)
scrps_hint = [f'{crps_mean[level_idx]:.4f}±{(1.96 * crps_std[level_idx]):.4f}' \
for level_idx in range(len(crps_mean))]
# Add reported baselines' performance
levels = ['Overall', 'Country', 'State', 'Zone', 'Region',
'Country/Purpose', 'State/Purpose', 'Zone/Purpose', 'Region/Purpose']
scrps_dpmn = ["0.1249±0.0020","0.0431±0.0042","0.0637±0.0032","0.1084±0.0033",
"0.1554±0.0025","0.0700±0.0038","0.1070±0.0023","0.1887±0.0032","0.2629±0.0034"]
scrps_hiere2e = ["0.1472±0.0029","0.0842±0.0051","0.1012±0.0029","0.1317±0.0022",
"0.1705±0.0023","0.0995±0.0061","0.1336±0.0042","0.1955±0.0025","0.2615±0.0016"]
scrps_arima_mintrace = ["0.1313±0.0009","0.0471±0.0018","0.0723±0.0011","0.1143±0.0007",
"0.1591±0.0006","0.0723±0.0014","0.1243±0.0014","0.1919±0.0008","0.2694±0.0006"]
scrps_arima_bu = ["0.1375±0.0013","0.0622±0.0026","0.0820±0.0019","0.1207±0.0010",
"0.1646±0.0007","0.0788±0.0018","0.1268±0.0017","0.1949±0.0010","0.2698±0.0008"]
scrps_arima = ["0.1416","0.0263","0.0904","0.1389","0.1878","0.0770","0.1270","0.2022","0.2834"]
scrps_results = dict(Levels=levels,
HINT=scrps_hint,
DPMN=scrps_dpmn,
HierE2E=scrps_hiere2e,
ARIMA_MinTrace_B=scrps_arima_mintrace,
ARIMA_BottomUp_B=scrps_arima_bu,
ARIMA=scrps_arima)
scrps_results = pd.DataFrame(scrps_results)
scrps_resultsY_test.shape [n_series, horizon] (555, 12)
Yq_hat.shape [n_samples, n_series, horizon, n_quantiles] (5, 555, 12, 99)
| Levels | HINT | DPMN | HierE2E | ARIMA_MinTrace_B | ARIMA_BottomUp_B | ARIMA | |
|---|---|---|---|---|---|---|---|
| 0 | Overall | 0.1178±0.0002 | 0.1249±0.0020 | 0.1472±0.0029 | 0.1313±0.0009 | 0.1375±0.0013 | 0.1416 |
| 1 | Country | 0.0288±0.0007 | 0.0431±0.0042 | 0.0842±0.0051 | 0.0471±0.0018 | 0.0622±0.0026 | 0.0263 |
| 2 | State | 0.0593±0.0004 | 0.0637±0.0032 | 0.1012±0.0029 | 0.0723±0.0011 | 0.0820±0.0019 | 0.0904 |
| 3 | Zone | 0.1023±0.0003 | 0.1084±0.0033 | 0.1317±0.0022 | 0.1143±0.0007 | 0.1207±0.0010 | 0.1389 |
| 4 | Region | 0.1451±0.0004 | 0.1554±0.0025 | 0.1705±0.0023 | 0.1591±0.0006 | 0.1646±0.0007 | 0.1878 |
| 5 | Country/Purpose | 0.0752±0.0006 | 0.0700±0.0038 | 0.0995±0.0061 | 0.0723±0.0014 | 0.0788±0.0018 | 0.0770 |
| 6 | State/Purpose | 0.1109±0.0001 | 0.1070±0.0023 | 0.1336±0.0042 | 0.1243±0.0014 | 0.1268±0.0017 | 0.1270 |
| 7 | Zone/Purpose | 0.1773±0.0003 | 0.1887±0.0032 | 0.1955±0.0025 | 0.1919±0.0008 | 0.1949±0.0010 | 0.2022 |
| 8 | Region/Purpose | 0.2438±0.0002 | 0.2629±0.0034 | 0.2615±0.0016 | 0.2694±0.0006 | 0.2698±0.0008 | 0.2834 |
参考文献
- Kin G. Olivares, David Luo, Cristian Challu, Stefania La Vattiata, Max Mergenthaler, Artur Dubrawski (2023). “HINT: 层次混合网络用于一致的概率预测”. 国际机器学习会议 (ICML). 结构化概率推理与生成建模研讨会. 可在 https://arxiv.org/abs/2305.07089 获取。
- Kin G. Olivares, O. Nganba Meetei, Ruijun Ma, Rohan Reddy, Mengfei Cao, Lee Dicker (2023).”使用深度泊松混合进行概率层次预测”. 国际预测期刊, 接受论文. URL https://arxiv.org/pdf/2110.13179.pdf.
- Kin G. Olivares, Federico Garza, David Luo, Cristian Challu, Max Mergenthaler, Souhaib Ben Taieb, Shanika Wickramasuriya, 和 Artur Dubrawski (2023). “HierarchicalForecast: 层次预测的参考框架”. 机器学习研究杂志, 提交中. URL https://arxiv.org/abs/2207.03517
Give us a ⭐ on Github