|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Soft-DTW loss for PyTorch neural network |
| 4 | +======================================== |
| 5 | +
|
| 6 | +The aim here is to use the Soft Dynamic Time Warping metric as a loss function of a PyTorch Neural Network for |
| 7 | +time series forecasting. |
| 8 | +
|
| 9 | +The `torch`-compatible implementation of the soft-DTW loss function is available from the |
| 10 | +:mod:`tslearn.metrics` module. |
| 11 | +""" |
| 12 | + |
| 13 | +# Authors: Yann Cabanes, Romain Tavenard |
| 14 | +# License: BSD 3 clause |
| 15 | +# sphinx_gallery_thumbnail_number = 2 |
| 16 | + |
| 17 | +"""Import the modules""" |
| 18 | + |
| 19 | +import numpy as np |
| 20 | +import matplotlib.pyplot as plt |
| 21 | +from tslearn.datasets import CachedDatasets |
| 22 | +from tslearn.metrics import SoftDTWLossPyTorch |
| 23 | +import torch |
| 24 | +from torch import nn |
| 25 | + |
| 26 | +############################################################################## |
| 27 | +# Load the dataset |
| 28 | +# ---------------- |
| 29 | +# |
| 30 | +# Using the CachedDatasets utility from tslearn, we load the "Trace" time series dataset. |
| 31 | +# The dimensions of the arrays storing the time series training and testing datasets are (100, 275, 1). |
| 32 | +# We create a new dataset X_subset made of 50 random time series from classes indexed 1 to 3 |
| 33 | +# (y_train < 4) in the training set: X_subset is of shape (50, 275, 1). |
| 34 | + |
| 35 | +data_loader = CachedDatasets() |
| 36 | +X_train, y_train, X_test, y_test = data_loader.load_dataset("Trace") |
| 37 | + |
| 38 | +X_subset = X_train[y_train < 4] |
| 39 | +np.random.shuffle(X_subset) |
| 40 | +X_subset = X_subset[:50] |
| 41 | + |
| 42 | +############################################################################## |
| 43 | +# Multi-step ahead forecasting |
| 44 | +# ---------------------------- |
| 45 | +# |
| 46 | +# In this section, our goal is to implement a single-hidden-layer perceptron for time series forecasting. |
| 47 | +# Our network will be trained to minimize the soft-DTW metric. |
| 48 | +# We will rely on a `torch`-compatible implementation of the soft-DTW loss function. |
| 49 | +# The code below is an implementation of a generic Multi-Layer-Perceptron class in torch, |
| 50 | +# and we will rely on it for the implementation of a forecasting MLP with softDTW loss. |
| 51 | + |
| 52 | +# Note that Soft-DTW can take negative values due to the regularization parameter gamma. |
| 53 | +# The normalized soft-DTW (also coined soft-DTW divergence) between the time series x and y is defined as: |
| 54 | +# Soft-DTW(x, y) - (Soft-DTW(x, x) + Soft-DTW(y, y)) / 2 |
| 55 | +# The normalized Soft-DTW is always positive. |
| 56 | +# However, the computation time of the normalized soft-DTW equals three times the computation time of the Soft-DTW. |
| 57 | + |
| 58 | +class MultiLayerPerceptron(torch.nn.Module): |
| 59 | + def __init__(self, layers, loss=None): |
| 60 | + # At init, we define our layers |
| 61 | + super(MultiLayerPerceptron, self).__init__() |
| 62 | + self.layers = layers |
| 63 | + if loss is None: |
| 64 | + self.loss = torch.nn.MSELoss(reduction="none") |
| 65 | + else: |
| 66 | + self.loss = loss |
| 67 | + self.optimizer = torch.optim.SGD(self.parameters(), lr=0.001) |
| 68 | + |
| 69 | + def forward(self, X): |
| 70 | + # The forward method informs about the forward pass: how one computes outputs of the network |
| 71 | + # from the input and the parameters of the layers registered at init |
| 72 | + if not isinstance(X, torch.Tensor): |
| 73 | + X = torch.Tensor(X) |
| 74 | + batch_size = X.size(0) |
| 75 | + X_reshaped = torch.reshape(X, (batch_size, -1)) # Manipulations to deal with time series format |
| 76 | + output = self.layers(X_reshaped) |
| 77 | + return torch.reshape(output, (batch_size, -1, 1)) # Manipulations to deal with time series format |
| 78 | + |
| 79 | + def fit(self, X, y, max_epochs=10): |
| 80 | + # The fit method performs the actual optimization |
| 81 | + X_torch = torch.Tensor(X) |
| 82 | + y_torch = torch.Tensor(y) |
| 83 | + |
| 84 | + for e in range(max_epochs): |
| 85 | + self.optimizer.zero_grad() |
| 86 | + # Forward pass |
| 87 | + y_pred = self.forward(X_torch) |
| 88 | + # Compute Loss |
| 89 | + loss = self.loss(y_pred, y_torch).mean() |
| 90 | + # Backward pass |
| 91 | + loss.backward() |
| 92 | + self.optimizer.step() |
| 93 | + |
| 94 | + |
| 95 | +############################################################################## |
| 96 | +# Using MSE as a loss function |
| 97 | +# ---------------------------- |
| 98 | +# |
| 99 | +# We define an MLP class that would allow training a single-hidden-layer model using |
| 100 | +# mean squared error (MSE) as a loss function to be optimized. |
| 101 | +# We train the network for 1000 epochs on a forecasting task that would consist, |
| 102 | +# given the first 150 elements of a time series, in predicting the next 125 ones. |
| 103 | + |
| 104 | +model = MultiLayerPerceptron( |
| 105 | + layers=nn.Sequential( |
| 106 | + nn.Linear(in_features=150, out_features=256), |
| 107 | + nn.ReLU(), |
| 108 | + nn.Linear(in_features=256, out_features=125) |
| 109 | + ) |
| 110 | +) |
| 111 | + |
| 112 | +# Here one needs to define what X and y are, obviously |
| 113 | +model.fit(X_subset[:, :150], X_subset[:, 150:], max_epochs=1000) |
| 114 | + |
| 115 | +ts_index = 50 |
| 116 | +y_pred = model(X_test[:, :150, 0]).detach().numpy() |
| 117 | + |
| 118 | +plt.figure() |
| 119 | +plt.title('Multi-step ahead forecasting using MSE') |
| 120 | +plt.plot(X_test[ts_index].ravel()) |
| 121 | +plt.plot(np.arange(150, 275), y_pred[ts_index], 'r-') |
| 122 | + |
| 123 | + |
| 124 | +############################################################################## |
| 125 | +# Using Soft-DTW as a loss function |
| 126 | +# --------------------------------- |
| 127 | +# |
| 128 | +# We take inspiration from the code above to define an MLP class that would allow training |
| 129 | +# a single-hidden-layer model using soft-DTW as a criterion to be optimized. |
| 130 | +# We train the network for 100 epochs on a forecasting task that would consist, given the first 150 elements |
| 131 | +# of a time series, in predicting the next 125 ones. |
| 132 | + |
| 133 | +model = MultiLayerPerceptron( |
| 134 | + layers=nn.Sequential( |
| 135 | + nn.Linear(in_features=150, out_features=256), |
| 136 | + nn.ReLU(), |
| 137 | + nn.Linear(in_features=256, out_features=125) |
| 138 | + ), |
| 139 | + loss=SoftDTWLossPyTorch(gamma=0.1) |
| 140 | +) |
| 141 | + |
| 142 | +model.fit(X_subset[:, :150], X_subset[:, 150:], max_epochs=100) |
| 143 | + |
| 144 | +y_pred = model(X_test[:, :150, 0]).detach().numpy() |
| 145 | + |
| 146 | +plt.figure() |
| 147 | +plt.title('Multi-step ahead forecasting using Soft-DTW loss') |
| 148 | +plt.plot(X_test[ts_index].ravel()) |
| 149 | +plt.plot(np.arange(150, 275), y_pred[ts_index], 'r-') |
0 commit comments