使用 MLflow

将您的 neuralforecast 实验记录到 MLflow

安装依赖项

要安装Neuralforecast,请参阅 https://nixtlaverse.nixtla.io/neuralforecast/examples/installation.html。

要安装mlflow:pip install mlflow

导入库

import logging
import os
import warnings

import matplotlib.pyplot as plt
import mlflow
import mlflow.data
import numpy as np
import pandas as pd
from mlflow.client import MlflowClient
from mlflow.data.pandas_dataset import PandasDataset
from utilsforecast.plotting import plot_series

from neuralforecast.core import NeuralForecast
from neuralforecast.models import NBEATSx
from neuralforecast.utils import AirPassengersDF
from neuralforecast.losses.pytorch import MAE
os.environ['NIXTLA_ID_AS_COL'] = '1'
logging.getLogger("mlflow").setLevel(logging.ERROR)
logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

划分数据

# 拆分数据并声明面板数据集
Y_df = AirPassengersDF
Y_train_df = Y_df[Y_df.ds<='1959-12-31'] # 132次列车
Y_test_df = Y_df[Y_df.ds>'1959-12-31'] # 12项测试
Y_df.tail()
unique_id ds y
139 1.0 1960-08-31 606.0
140 1.0 1960-09-30 508.0
141 1.0 1960-10-31 461.0
142 1.0 1960-11-30 390.0
143 1.0 1960-12-31 432.0

MLflow UI

从终端运行以下命令以启动 UI:mlflow ui。然后您可以访问打印的 URL 以可视化实验。

模型训练

mlflow.pytorch.autolog(checkpoint=False)

with mlflow.start_run() as run:
    # Log the dataset to the MLflow Run. Specify the "training" context to indicate that the
    # 数据集用于模型训练
    dataset: PandasDataset = mlflow.data.from_pandas(Y_df, source="AirPassengersDF")
    mlflow.log_input(dataset, context="training")

    # 定义并记录参数
    horizon = len(Y_test_df)
    model_params = dict(
        input_size=1 * horizon,
        h=horizon,
        max_steps=300,  
        loss=MAE(),
        valid_loss=MAE(),  
        activation='ReLU',
        scaler_type='robust',
        random_seed=42,
        enable_progress_bar=False,
    )
    mlflow.log_params(model_params)

    # 拟合NBEATSx模型
    models = [NBEATSx(**model_params)]
    nf = NeuralForecast(models=models, freq='M')           
    train = nf.fit(df=Y_train_df, val_size=horizon)
    
    # 保存用于运行模型的conda环境
    mlflow.pytorch.get_default_conda_env()
    
    # 保存 pip 需求
    mlflow.pytorch.get_default_pip_requirements()

mlflow.pytorch.autolog(disable=True)

# 保存神经预测模型
nf.save(path='./checkpoints/test_run_1/',
        model_index=None, 
        overwrite=True,
        save_dataset=True)
Seed set to 42

预测未来

Y_hat_df = nf.predict(futr_df=Y_test_df)
plot_series(Y_train_df, Y_hat_df, palette='tab20b')

Give us a ⭐ on Github