时间卷积网络

在这个笔记本中,我们展示了一个如何将TCN与darts结合使用的示例。

[1]:
# fix python path if working locally
from utils import fix_pythonpath_if_working_locally

fix_pythonpath_if_working_locally()
[2]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pytorch_lightning.callbacks import TQDMProgressBar

from darts import TimeSeries, concatenate
from darts.utils.callbacks import TFMProgressBar
from darts.models import TCNModel, RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.metrics import mape, r2_score
from darts.utils.missing_values import fill_missing_values
from darts.datasets import AirPassengersDataset, SunspotsDataset, EnergyDataset

import warnings

warnings.filterwarnings("ignore")

import logging

logging.disable(logging.CRITICAL)


def generate_torch_kwargs():
    # run torch models on CPU, and disable progress bars for all model stages except training.
    return {
        "pl_trainer_kwargs": {
            "accelerator": "cpu",
            "callbacks": [TFMProgressBar(enable_train_bar_only=True)],
        }
    }

航空乘客数据集

[3]:
# Read data:
ts = AirPassengersDataset().load()

# Create training and validation sets:
train, val = ts.split_after(pd.Timestamp("19580801"))

scaler = Scaler()

train_scaled = scaler.fit_transform(train)
val_scaled = scaler.transform(val)
ts_scaled = scaler.transform(ts)

# We'll use the month as a covariate (scaling not required as one-hot-encoded)
month_series = datetime_attribute_timeseries(ts, attribute="month", one_hot=True)

在下面的单元格中,我们将构建一个 TCNModel。稍后,当我们训练它时,模型将把内部输入宽度初始化为2,因为我们有一个时间序列的维度,另一个是协变量月份时间序列的维度(尽管这对我们来说是透明的)。此外,由于我们稍后想要模拟一个未来6个月的历史预测,我们必须确保 TCNModel 实例的 output_chunk_length 参数至少为6。

如果我们不指定 num_layers 参数,它将被自动选择为确保每个输出条目都能覆盖完整历史记录的最小层数,即每个输出都将依赖于整个输入。

[4]:
model_name = "TCN_air"
model_air = TCNModel(
    input_chunk_length=13,
    output_chunk_length=12,
    n_epochs=500,
    dropout=0.1,
    dilation_base=2,
    weight_norm=True,
    kernel_size=5,
    num_filters=3,
    random_state=0,
    save_checkpoints=True,
    model_name=model_name,
    force_reset=True,
    **generate_torch_kwargs()
)
[5]:
model_air.fit(
    series=train_scaled,
    past_covariates=month_series,
    val_series=val_scaled,
    val_past_covariates=month_series,
)
[5]:
TCNModel(kernel_size=5, num_filters=3, num_layers=None, dilation_base=2, weight_norm=True, dropout=0.1, input_chunk_length=13, output_chunk_length=12, n_epochs=500, random_state=0, save_checkpoints=True, model_name=TCN_air, force_reset=True, pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x2b00e2a40>]})

我们将从验证集上表现最好的检查点加载模型。

[6]:
model_air = TCNModel.load_from_checkpoint(model_name=model_name, best=True)

让我们看看6个月的预测结果:

[7]:
backtest = model_air.historical_forecasts(
    series=ts_scaled,
    past_covariates=month_series,
    start=val_scaled.start_time(),
    forecast_horizon=6,
    retrain=False,
    verbose=True,
)
[8]:
ts_scaled.plot(label="actual")
backtest.plot(label="backtest (H=6)")
[8]:
<Axes: xlabel='time'>
../_images/examples_05-TCN-examples_12_1.png

月度太阳黑子

让我们尝试在更复杂且更大的数据集上使用 TCNModel

[9]:
series_sunspot = SunspotsDataset().load()

train, val = series_sunspot.split_after(pd.Timestamp("19401001"))

scaler = Scaler()

train_sp_transformed = scaler.fit_transform(train)
val_sp_transformed = scaler.transform(val)
series_sp_transformed = scaler.transform(series_sunspot)
[10]:
model_name = "TCN_sun"
model_sun = TCNModel(
    input_chunk_length=250,
    output_chunk_length=36,
    n_epochs=100,
    dropout=0,
    dilation_base=2,
    weight_norm=True,
    kernel_size=3,
    num_filters=6,
    nr_epochs_val_period=1,
    random_state=0,
    save_checkpoints=True,
    model_name=model_name,
    force_reset=True,
    **generate_torch_kwargs()
)
[11]:
model_sun.fit(train_sp_transformed, val_series=val_sp_transformed)
[11]:
TCNModel(kernel_size=3, num_filters=6, num_layers=None, dilation_base=2, weight_norm=True, dropout=0, input_chunk_length=250, output_chunk_length=36, n_epochs=100, nr_epochs_val_period=1, random_state=0, save_checkpoints=True, model_name=TCN_sun, force_reset=True, pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x176bcc790>]})
[12]:
model_sun = TCNModel.load_from_checkpoint(model_name=model_name, best=True)
[13]:
backtest_sp = model_sun.historical_forecasts(
    series_sp_transformed,
    start=val_sp_transformed.start_time(),
    forecast_horizon=12,
    stride=12,
    last_points_only=False,
    retrain=False,
    verbose=True,
)
backtest_sp = concatenate(backtest_sp)
[14]:
val_sp_transformed.plot(label="actual")
backtest_sp.plot(label="backtest (H=12)")
plt.legend()
[14]:
<matplotlib.legend.Legend at 0x288b5d720>
../_images/examples_05-TCN-examples_19_1.png

每日能量生产

[15]:
df3 = EnergyDataset().load().pd_dataframe()
df3_day_avg = (
    df3.groupby(df3.index.astype(str).str.split(" ").str[0]).mean().reset_index()
)
series_en = fill_missing_values(
    TimeSeries.from_dataframe(
        df3_day_avg, "time", ["generation hydro run-of-river and poundage"]
    ),
    "auto",
)

# create train and test splits
train_en, val_en = series_en.split_after(pd.Timestamp("20170901"))

# scale the data
scaler_en = Scaler()
train_en_transformed = scaler_en.fit_transform(train_en)
val_en_transformed = scaler_en.transform(val_en)
series_en_transformed = scaler_en.transform(series_en)

# add the day as a covariate (scaling not required as one-hot-encoded)
day_series = datetime_attribute_timeseries(
    series_en_transformed, attribute="day", one_hot=True
)

plt.figure(figsize=(10, 3))
train_en_transformed.plot(label="train")
val_en_transformed.plot(label="validation")
[15]:
<Axes: xlabel='time'>
../_images/examples_05-TCN-examples_21_1.png
[16]:
model_name = "TCN_energy"
model_en = TCNModel(
    input_chunk_length=365,
    output_chunk_length=7,
    n_epochs=50,
    dropout=0.2,
    dilation_base=2,
    weight_norm=True,
    kernel_size=5,
    num_filters=8,
    nr_epochs_val_period=1,
    random_state=0,
    save_checkpoints=True,
    model_name=model_name,
    force_reset=True,
    **generate_torch_kwargs()
)
[17]:
model_en.fit(
    series=train_en_transformed,
    past_covariates=day_series,
    val_series=val_en_transformed,
    val_past_covariates=day_series,
)
[17]:
TCNModel(kernel_size=5, num_filters=8, num_layers=None, dilation_base=2, weight_norm=True, dropout=0.2, input_chunk_length=365, output_chunk_length=7, n_epochs=50, nr_epochs_val_period=1, random_state=0, save_checkpoints=True, model_name=TCN_energy, force_reset=True, pl_trainer_kwargs={'accelerator': 'cpu', 'callbacks': [<darts.utils.callbacks.TFMProgressBar object at 0x2bc722440>]})
[18]:
model_en = TCNModel.load_from_checkpoint(model_name=model_name, best=True)
[19]:
backtest_en = model_en.historical_forecasts(
    series=series_en_transformed,
    past_covariates=day_series,
    start=val_en_transformed.start_time(),
    forecast_horizon=7,
    stride=7,
    last_points_only=False,
    retrain=False,
    verbose=True,
)
backtest_en = concatenate(backtest_en)
[20]:
plt.figure(figsize=(10, 6))
val_en_transformed.plot(label="actual")
backtest_en.plot(label="backtest (H=7)")
plt.legend()
[20]:
<matplotlib.legend.Legend at 0x2bc3c3970>
../_images/examples_05-TCN-examples_26_1.png
[ ]: