TSMixerx

%load_ext autoreload
%autoreload 2

时间序列混合外生模型 (TSMixerx) 是一种基于多层感知器（MLP）的多变量时间序列预测模型，具有额外外生输入的能力。TSMixerx 通过反复结合时间和特征信息，联合学习时间序列的时间和横截面表示，采用堆叠混合层。一个混合层由一个顺序的时间和特征多层感知器（MLP）组成。

参考文献
- Chen, Si-An, Chun-Liang Li, Nate Yoder, Sercan O. Arik, 和 Tomas Pfister (2023). “TSMixer: 一种用于时间序列预测的全MLP架构。”

from fastcore.test import test_eq
from nbdev.showdoc import show_doc

import torch
import torch.nn as nn
import torch.nn.functional as F

from neuralforecast.losses.pytorch import MAE
from neuralforecast.common._base_multivariate import BaseMultivariate

1. 辅助函数

1.1 混合层

混合层由一个顺序的时间和特征多层感知器（MLP）组成。

class 时间混合(nn.Module):
    """ 
    时间混合
    """      
    def __init__(self, num_features, h, dropout):
        super().__init__()
        self.temporal_norm = nn.LayerNorm(normalized_shape=(h, num_features))
        self.temporal_lin = nn.Linear(h, h)
        self.temporal_drop = nn.Dropout(dropout)

    def forward(self, input):
        x = input.permute(0, 2, 1)                                      # [B, h, C] -> [B, C, h]
        x = F.relu(self.temporal_lin(x))                                # [B, C, h] -> [B, C, h]
        x = x.permute(0, 2, 1)                                          # [B, C, h] -> [B, h, C]
        x = self.temporal_drop(x)                                       # [B, h, C] -> [B, h, C]

        return self.temporal_norm(x + input)

class 特征混合(nn.Module):
    """ 
    特征混合
    """       
    def __init__(self, in_features, out_features, h, dropout, ff_dim):
        super().__init__()
        self.feature_lin_1 = nn.Linear(in_features=in_features, 
                                       out_features=ff_dim)
        self.feature_lin_2 = nn.Linear(in_features=ff_dim, 
                                       out_features=out_features)
        self.feature_drop_1 = nn.Dropout(p=dropout)
        self.feature_drop_2 = nn.Dropout(p=dropout)
        self.linear_project_residual = False
        if in_features != out_features:
            self.project_residual = nn.Linear(in_features = in_features,
                                        out_features = out_features)
            self.linear_project_residual = True

        self.feature_norm = nn.LayerNorm(normalized_shape=(h, out_features))

    def forward(self, input):
        x = F.relu(self.feature_lin_1(input))                           # [B, h, C_in] -> [B, h, ff_dim]
        x = self.feature_drop_1(x)                                      # [B, h, ff_dim] -> [B, h, ff_dim]
        x = self.feature_lin_2(x)                                       # [B, h, ff_dim] -> [B, h, C_out]
        x = self.feature_drop_2(x)                                      # [B, h, C_out] -> [B, h, C_out]
        if self.linear_project_residual:
            input = self.project_residual(input)                        # [B, h, C_in] -> [B, h, C_out]

        return self.feature_norm(x + input)

class 混合层(nn.Module):
    """ 
    混合层
    """      
    def __init__(self, in_features, out_features, h, dropout, ff_dim):
        super().__init__()
        # 混合层由时间混合器和特征混合器组成。
        self.temporal_mixer = 时间混合(num_features=in_features, 
                                             h=h, 
                                             dropout=dropout)
        self.feature_mixer = 特征混合(in_features=in_features, 
                                           out_features=out_features, 
                                           h=h, 
                                           dropout=dropout, 
                                           ff_dim=ff_dim)

    def forward(self, input):
        x = self.temporal_mixer(input)                                  # [B, h, C_in] -> [B, h, C_in]
        x = self.feature_mixer(x)                                       # [B, h, C_in] -> [B, h, C_out]
        return x
    
class 混合层与静态外生变量(nn.Module):
    """ 
    混合层与静态外生变量
    """      
    def __init__(self, h, dropout, ff_dim, stat_input_size):
        super().__init__()
        # 静态外生变量的特征混合器
        self.feature_mixer_stat = 特征混合(in_features=stat_input_size, 
                                                out_features=ff_dim, 
                                                h=h, 
                                                dropout=dropout, 
                                                ff_dim=ff_dim)
        # 混合层由时间混合器和特征混合器组成。
        self.temporal_mixer = 时间混合(num_features=2 * ff_dim, 
                                             h=h, 
                                             dropout=dropout)
        self.feature_mixer = 特征混合(in_features=2 * ff_dim, 
                                           out_features=ff_dim, 
                                           h=h, 
                                           dropout=dropout, 
                                           ff_dim=ff_dim)

    def forward(self, inputs):
        input, stat_exog = inputs
        x_stat = self.feature_mixer_stat(stat_exog)                     # [B, h, S] -> [B, h, ff_dim]
        x = torch.cat((input, x_stat), dim=2)                           # [B, h, ff_dim] + [B, h, ff_dim] -> [B, h, 2 * ff_dim]
        x = self.temporal_mixer(x)                                      # [B, h, 2 * ff_dim] -> [B, h, 2 * ff_dim]
        x = self.feature_mixer(x)                                       # [B, h, 2 * ff_dim] -> [B, h, ff_dim]
        return (x, stat_exog)

1.2 可逆实例归一化

一种可逆的实例归一化层，基于这个参考实现。

class ReversibleInstanceNorm1d(nn.Module):
    def __init__(self, n_series, eps=1e-5):
        super().__init__()
        self.weight = nn.Parameter(torch.ones((1, 1, 1, n_series)))
        self.bias = nn.Parameter(torch.zeros((1, 1, 1, n_series)))
        self.eps = eps

    def forward(self, x):
        # 批量统计
        self.batch_mean = torch.mean(x, axis=2, keepdim=True).detach()
        self.batch_std = torch.sqrt(torch.var(x, axis=2, keepdim=True, unbiased=False) + self.eps).detach()
        
        # 实例归一化
        x = x - self.batch_mean
        x = x / self.batch_std
        x = x * self.weight
        x = x + self.bias
        
        return x

    def reverse(self, x):
        # 逆转正常化
        x = x - self.bias
        x = x / self.weight       
        x = x * self.batch_std
        x = x + self.batch_mean       

        return x

2. 模型

class TSMixerx(BaseMultivariate):
    """ TSMixerx

    Time-Series Mixer exogenous (`TSMixerx`) is a MLP-based multivariate time-series forecasting model, with capability for additional exogenous inputs. `TSMixerx` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`).

    **Parameters:**<br>
    `h`: int, forecast horizon.<br>
    `input_size`: int, considered autorregresive inputs (lags), y=[1,2,3,4] input_size=2 -> lags=[1,2].<br>
    `n_series`: int, number of time-series.<br>
    `futr_exog_list`: str list, future exogenous columns.<br>
    `hist_exog_list`: str list, historic exogenous columns.<br>
    `stat_exog_list`: str list, static exogenous columns.<br>
    `n_block`: int=2, number of mixing layers in the model.<br>
    `ff_dim`: int=64, number of units for the second feed-forward layer in the feature MLP.<br>
    `dropout`: float=0.0, dropout rate between (0, 1) .<br>
    `revin`: bool=True, if True uses Reverse Instance Normalization on `insample_y` and applies it to the outputs.<br>    
    `loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).<br>
    `valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).<br>
    `max_steps`: int=1000, maximum number of training steps.<br>
    `learning_rate`: float=1e-3, Learning rate between (0, 1).<br>
    `num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.<br>
    `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.<br>
    `val_check_steps`: int=100, Number of training steps between every validation loss check.<br>
    `batch_size`: int=32, number of different series in each batch.<br>
    `step_size`: int=1, step size between each window of temporal data.<br>
    `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).<br>
    `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.<br>
    `num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.<br>
    `drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.<br>
    `alias`: str, optional,  Custom name of the model.<br>
    `optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).<br>
    `optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.<br>
    `lr_scheduler`: Subclass of 'torch.optim.lr_scheduler.LRScheduler', optional, user specified lr_scheduler instead of the default choice (StepLR).<br>
    `lr_scheduler_kwargs`: dict, optional, list of parameters used by the user specified `lr_scheduler`.<br>    
    `**trainer_kwargs`: int,  keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).<br>    

    **References:**<br>
    - [Chen, Si-An, Chun-Liang Li, Nate Yoder, Sercan O. Arik, and Tomas Pfister (2023). "TSMixer: An All-MLP Architecture for Time Series Forecasting."](http://arxiv.org/abs/2303.06053)

    """
    # Class attributes
    SAMPLING_TYPE = 'multivariate'
    EXOGENOUS_FUTR = True
    EXOGENOUS_HIST = True
    EXOGENOUS_STAT = True

    def __init__(self,
                 h,
                 input_size,
                 n_series,
                 futr_exog_list = None,
                 hist_exog_list = None,
                 stat_exog_list = None,
                 n_block = 2,
                 ff_dim = 64,
                 dropout = 0.0,
                 revin = True,
                 loss = MAE(),
                 valid_loss = None,
                 max_steps: int = 1000,
                 learning_rate: float = 1e-3,
                 num_lr_decays: int = -1,
                 early_stop_patience_steps: int =-1,
                 val_check_steps: int = 100,
                 batch_size: int = 32,
                 step_size: int = 1,
                 scaler_type: str = 'identity',
                 random_seed: int = 1,
                 num_workers_loader: int = 0,
                 drop_last_loader: bool = False,
                 optimizer = None,
                 optimizer_kwargs = None,
                 lr_scheduler = None,
                 lr_scheduler_kwargs = None,
                 **trainer_kwargs):

        # Inherit BaseMultvariate class
        super(TSMixerx, self).__init__(h=h,
                                    input_size=input_size,
                                    n_series=n_series,
                                    futr_exog_list=futr_exog_list,
                                    hist_exog_list=hist_exog_list,
                                    stat_exog_list=stat_exog_list,
                                    loss=loss,
                                    valid_loss=valid_loss,
                                    max_steps=max_steps,
                                    learning_rate=learning_rate,
                                    num_lr_decays=num_lr_decays,
                                    early_stop_patience_steps=early_stop_patience_steps,
                                    val_check_steps=val_check_steps,
                                    batch_size=batch_size,
                                    step_size=step_size,
                                    scaler_type=scaler_type,
                                    random_seed=random_seed,
                                    num_workers_loader=num_workers_loader,
                                    drop_last_loader=drop_last_loader,
                                    optimizer=optimizer,
                                    optimizer_kwargs=optimizer_kwargs,
                                    lr_scheduler=lr_scheduler,
                                    lr_scheduler_kwargs=lr_scheduler_kwargs,
                                    **trainer_kwargs)
        # Reversible InstanceNormalization layer
        self.revin = revin
        if self.revin:
            self.norm = ReversibleInstanceNorm1d(n_series = n_series)

        # Forecast horizon
        self.h = h

        # Temporal projection and feature mixing of historical variables
        self.temporal_projection = nn.Linear(in_features=input_size, 
                                            out_features=h)

        self.feature_mixer_hist = FeatureMixing(in_features=n_series * (1 + self.hist_exog_size + self.futr_exog_size),
                                                out_features=ff_dim,
                                                h=h, 
                                                dropout=dropout, 
                                                ff_dim=ff_dim)
        first_mixing_ff_dim_multiplier = 1

        # Feature mixing of future variables
        if self.futr_exog_size > 0:
            self.feature_mixer_futr = FeatureMixing(in_features = n_series * self.futr_exog_size,
                                                    out_features=ff_dim,
                                                    h=h,
                                                    dropout=dropout,
                                                    ff_dim=ff_dim)
            first_mixing_ff_dim_multiplier += 1

        # Feature mixing of static variables
        if self.stat_exog_size > 0:
            self.feature_mixer_stat = FeatureMixing(in_features=self.stat_exog_size * n_series,
                                                    out_features=ff_dim,
                                                    h=h,
                                                    dropout=dropout,
                                                    ff_dim=ff_dim)            
            first_mixing_ff_dim_multiplier += 1

        # 第一混合层
        self.first_mixing = MixingLayer(in_features = first_mixing_ff_dim_multiplier * ff_dim,
                                        out_features=ff_dim,
                                        h=h,
                                        dropout=dropout,
                                        ff_dim=ff_dim)

        # Mixing layer block
        if self.stat_exog_size > 0:
            mixing_layers = [MixingLayerWithStaticExogenous(
                                         h=h, 
                                        dropout=dropout, 
                                        ff_dim=ff_dim,
                                        stat_input_size=self.stat_exog_size * n_series) 
                                        for _ in range(n_block)]        
        else:
            mixing_layers = [MixingLayer(in_features=ff_dim,
                                         out_features=ff_dim,
                                         h=h, 
                                        dropout=dropout, 
                                        ff_dim=ff_dim) 
                                        for _ in range(n_block)]

        self.mixing_block = nn.Sequential(*mixing_layers)

        # Linear output with Loss dependent dimensions
        self.out = nn.Linear(in_features=ff_dim, 
                             out_features=self.loss.outputsize_multiplier * n_series)


    def forward(self, windows_batch):
        # Parse batch
        x             = windows_batch['insample_y']             #   [batch_size (B), input_size (L), n_series (N)]
        hist_exog     = windows_batch['hist_exog']              #   [B, hist_exog_size (X), L, N]
        futr_exog     = windows_batch['futr_exog']              #   [B, futr_exog_size (F), L + h, N]
        stat_exog     = windows_batch['stat_exog']              # [N, stat_exog_size (S)]
        batch_size, input_size = x.shape[:2]

        # 为x添加通道维度
        x = x.unsqueeze(1)                                      # [B, L, N] -> [B, 1, L, N]

        # 将revin应用于x
        if self.revin:
            x = self.norm(x)                                    # [批次, 1, 长度, 嵌入维度] -> [批次, 1, 长度, 嵌入维度]
        
        # 将 x 与历史外生变量连接
        if self.hist_exog_size > 0:
            x = torch.cat((x, hist_exog), dim=1)                # [B, 1, L, N] + [B, X, L, N] -> [B, 1 + X, L, N]

        # 将x与输入序列的未来外生变量连接起来
        if self.futr_exog_size > 0:
            futr_exog_hist = futr_exog[:, :, :input_size]       # [B, F, L + h, N] -> [B, F, L, N]
            x = torch.cat((x, futr_exog_hist), dim=1)           # [B, 1 + X, L, N] + [B, F, L, N] -> [B, 1 + X + F, L, N]
            
        # x的时间投影与特征混合
        x = x.permute(0, 1, 3, 2)                               # [B, 1 + X + F, L, N] -> [B, 1 + X + F, N, L]
        x = self.temporal_projection(x)                         # [B, 1 + X + F, N, L] -> [B, 1 + X + F, N, h]
        x = x.permute(0, 3, 1, 2)                               # [B, 1 + X + F, N, h] -> [B, h, 1 + X + F, N]
        x = x.reshape(batch_size, self.h, -1)                   # [B, h, 1 + X + F, N] -> [B, h, (1 + X + F) * N]
        x = self.feature_mixer_hist(x)                          # [B, h, (1 + X + F) * N] -> [B, h, ff_dim] 

        # 将x与输出范围的未来外生变量连接起来
        if self.futr_exog_size > 0:
            x_futr = futr_exog[:, :, input_size:]               # [B, F, L + h, N] -> [B, F, h, N] 
            x_futr = x_futr.permute(0, 2, 1, 3)                 # [B, F, h, N] -> [B, h, F, N] 
            x_futr = x_futr.reshape(batch_size, 
                                    self.h, -1)                 # [B, h, N, F] -> [B, h, N * F]
            x_futr = self.feature_mixer_futr(x_futr)            # [B, h, N * F] -> [B, h, ff_dim] 
            x = torch.cat((x, x_futr), dim=2)                   # [B, h, ff_dim] + [B, h, ff_dim] -> [B, h, 2 * ff_dim]

        # 将 x 与静态外生变量连接起来
        if self.stat_exog_size > 0:
            stat_exog = stat_exog.reshape(-1)                   # [N, S] -> [N * S]
            stat_exog = stat_exog.unsqueeze(0)\
                                 .unsqueeze(1)\
                                 .repeat(batch_size, 
                                         self.h, 
                                         1)                     # [N * S] -> [B, h, N * S]
            x_stat = self.feature_mixer_stat(stat_exog)         # [B, h, N * S] -> [B, h, ff_dim] 
            x = torch.cat((x, x_stat), dim=2)                   # [B, h, 2 * ff_dim] + [B, h, ff_dim] -> [B, h, 3 * ff_dim] 

        # 第一混合层
        x = self.first_mixing(x)                                # [B, h, 3 * ff_dim] -> [B, h, ff_dim] 

        # N个混合层块
        if self.stat_exog_size > 0:
            x, _ = self.mixing_block((x, stat_exog))            # [B, h, ff_dim], [B, h, N * S] -> [B, h, ff_dim]  
        else:
            x = self.mixing_block(x)                            # [B, h, ff_dim] -> [B, h, ff_dim] 
      
        # 全连接输出层
        x = self.out(x)                                         # [B, h, ff_dim] -> [B, h, N * n_outputs]
        
        # 输出端的反向实例归一化
        if self.revin:
            x = x.reshape(batch_size, 
                          self.h, 
                          self.loss.outputsize_multiplier,
                          -1)                                   # [B, h, N * n_outputs] -> [B, h, n_outputs, N]
            x = self.norm.reverse(x)
            x = x.reshape(batch_size, self.h, -1)               # [B, h, n_outputs, N] -> [B, h, n_outputs * N]

        # 映射到损失域
        forecast = self.loss.domain_map(x)

        # 在 n_series == 1 的情况下，domain_map 可能已经压缩了最后一个维度。
        # 请注意，在元组损失的情况下，此方法会失败，但多元损失尚不支持元组损失。
        if forecast.ndim == 2:
            return forecast.unsqueeze(-1)
        else:
            return forecast

show_doc(TSMixerx)

show_doc(TSMixerx.fit, name='TSMixerx.fit')

show_doc(TSMixerx.predict, name='TSMixerx.predict')

import logging
import warnings
import pandas as pd

from neuralforecast import NeuralForecast
from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, generate_series
from neuralforecast.losses.pytorch import MAE, MSE, RMSE, MAPE, SMAPE, MASE, relMSE, QuantileLoss, MQLoss, DistributionLoss,PMM, GMM, NBMM, HuberLoss, TukeyLoss, HuberQLoss, HuberMQLoss

# 测试损失
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

Y_train_df = AirPassengersPanel[AirPassengersPanel.ds<AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 132次列车
Y_test_df = AirPassengersPanel[AirPassengersPanel.ds>=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12项测试

AirPassengersStatic_single = AirPassengersStatic[AirPassengersStatic["unique_id"] == 'Airline1']
Y_train_df_single = Y_train_df[Y_train_df["unique_id"] == 'Airline1']
Y_test_df_single = Y_test_df[Y_test_df["unique_id"] == 'Airline1']

losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]
valid_losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]

for loss, valid_loss in zip(losses, valid_losses):
    try:
        model = TSMixerx(h=12,
                        input_size=24,
                        n_series=2,
                        stat_exog_list=['airline1'],
                        futr_exog_list=['trend'],
                        n_block=4,
                        ff_dim=4,
                        revin=True,
                        scaler_type='standard',
                        max_steps=2,
                        early_stop_patience_steps=-1,
                        val_check_steps=5,
                        learning_rate=1e-3,
                        loss=loss,
                        valid_loss=valid_loss,
                        batch_size=32
                        )

        fcst = NeuralForecast(models=[model], freq='M')
        fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)
        forecasts = fcst.predict(futr_df=Y_test_df)
    except Exception as e:
        assert str(e) == f"{loss} is not supported in a Multivariate model."


# 测试 n_系列 = 1
model = TSMixerx(h=12,
                input_size=24,
                n_series=1,
                stat_exog_list=['airline1'],
                futr_exog_list=['trend'],
                n_block=4,
                ff_dim=4,
                revin=True,
                scaler_type='standard',
                max_steps=2,
                early_stop_patience_steps=-1,
                val_check_steps=5,
                learning_rate=1e-3,
                loss=MAE(),
                valid_loss=MAE(),
                batch_size=32
                )
fcst = NeuralForecast(models=[model], freq='M')
fcst.fit(df=Y_train_df_single, static_df=AirPassengersStatic_single, val_size=12)
forecasts = fcst.predict(futr_df=Y_test_df_single) 

# 测试 n_系列 > 1024
# 查看问题：https://github.com/Nixtla/neuralforecast/issues/948
n_series = 1111
Y_df, S_df = generate_series(n_series=n_series, n_temporal_features=2, n_static_features=2)

model = TSMixerx(
                h=12,
                input_size=24,
                n_series=n_series,
                stat_exog_list=['static_0', 'static_1'],
                hist_exog_list=["temporal_0", "temporal_1"],
                n_block=4,
                ff_dim=3,
                revin=True,
                scaler_type="standard",
                max_steps=5,
                early_stop_patience_steps=-1,
                val_check_steps=5,
                learning_rate=1e-3,
                loss=MAE(),
                valid_loss=MAE(),
                batch_size=32,
)

fcst = NeuralForecast(models=[model], freq="D")
fcst.fit(df=Y_df, static_df=S_df, val_size=12)
forecasts = fcst.predict()

3. 使用示例

训练模型并使用 predict 方法预测未来值。

import pandas as pd
import matplotlib.pyplot as plt

from neuralforecast import NeuralForecast
from neuralforecast.models import TSMixerx
from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic
from neuralforecast.losses.pytorch import MAE

Y_train_df = AirPassengersPanel[AirPassengersPanel.ds<AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 132次列车
Y_test_df = AirPassengersPanel[AirPassengersPanel.ds>=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12项测试

model = TSMixerx(h=12,
                input_size=24,
                n_series=2,
                stat_exog_list=['airline1'],
                futr_exog_list=['trend'],
                n_block=4,
                ff_dim=4,
                revin=True,
                scaler_type='standard',
                max_steps=500,
                early_stop_patience_steps=-1,
                val_check_steps=5,
                learning_rate=1e-3,
                loss=MAE(),
                valid_loss=MAE(),
                batch_size=32
                )

fcst = NeuralForecast(models=[model], freq='M')
fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)
forecasts = fcst.predict(futr_df=Y_test_df)

# 情节预测
fig, ax = plt.subplots(1, 1, figsize = (20, 7))
Y_hat_df = forecasts.reset_index(drop=False).drop(columns=['unique_id','ds'])
plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)
plot_df = pd.concat([Y_train_df, plot_df])

plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)
plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')
plt.plot(plot_df['ds'], plot_df['TSMixerx'], c='blue', label='Forecast')
ax.set_title('AirPassengers Forecast', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Year', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()

使用cross_validation来预测多个历史值。

fcst = NeuralForecast(models=[model], freq='M')
forecasts = fcst.cross_validation(df=AirPassengersPanel, static_df=AirPassengersStatic, n_windows=2, step_size=12)

# 情节预测
fig, ax = plt.subplots(1, 1, figsize = (20, 7))
Y_hat_df = forecasts.loc['Airline1']
Y_df = AirPassengersPanel[AirPassengersPanel['unique_id']=='Airline1']

plt.plot(Y_df['ds'], Y_df['y'], c='black', label='True')
plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixerx'], c='blue', label='Forecast')
ax.set_title('AirPassengers Forecast', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Year', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()

Give us a ⭐ on Github