cornellius-gp · gpleiss · Aug 8, 2022 · Jul 27, 2022 · Aug 8, 2022 · Balandat
diff --git a/gpytorch/means/constant_mean.py b/gpytorch/means/constant_mean.py
@@ -1,29 +1,111 @@
 #!/usr/bin/env python3
 
+import warnings
+from typing import Any, Optional
+
 import torch
 
-from ..utils.broadcasting import _mul_broadcast_shape
+from ..constraints import Interval
+from ..priors import Prior
+from ..utils.warnings import OldVersionWarning
 from .mean import Mean
 
 
+def _ensure_updated_strategy_flag_set(
+    state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+):
+    if prefix + "constant" in state_dict:
+        constant = state_dict.pop(prefix + "constant").squeeze(-1)  # Remove deprecated singleton dimension
+        state_dict[prefix + "raw_constant"] = constant
+        warnings.warn(
+            "You have loaded a GP model with a ConstantMean  from a previous version of "
+            "GPyTorch. The mean module parameter `constant` has been renamed to `raw_constant`. "
+            "Additionally, the shape of `raw_constant` is now *batch_shape, whereas the shape of "
+            "`constant` was *batch_shape x 1. "
+            "We have updated the name/shape of the parameter in your state dict, but we recommend that you "
+            "re-save your model.",
+            OldVersionWarning,
+        )
+
+
 class ConstantMean(Mean):
-    def __init__(self, prior=None, batch_shape=torch.Size(), **kwargs):
+    r"""
+    A (non-zero) constant prior mean function, i.e.:
+
+    .. math::
+        \mu(\mathbf x) = C
+
+    where :math:`C` is a learned constant.
+
+    :param constant_prior: Prior for constant parameter :math:`C`.
+    :type constant_prior: ~gpytorch.priors.Prior, optional
+    :param constant_constraint: Constraint for constant parameter :math:`C`.
+    :type constant_constraint: ~gpytorch.priors.Interval, optional
+    :param batch_shape: The batch shape of the learned constant(s) (default: []).
+    :type batch_shape: torch.Size, optional
-    :type constant_prior: ~gpytorch.priors.Prior, optional
-    :param constant_constraint: Constraint for constant parameter :math:`C`.
-    :type constant_constraint: ~gpytorch.priors.Interval, optional
-    :param batch_shape: The batch shape of the learned constant(s) (default: []).
-    :type batch_shape: torch.Size, optional
+    :type constant_prior: ~gpytorch.priors.Prior, optional.
+    :param constant_constraint: Constraint for constant parameter :math:`C`.
+    :type constant_constraint: ~gpytorch.priors.Interval, optional.
+    :param batch_shape: The batch shape of the learned constant(s) (default: []).
+    :type batch_shape: torch.Size, optional.
-    :type constant_prior: ~gpytorch.priors.Prior, optional
-    :param constant_constraint: Constraint for constant parameter :math:`C`.
-    :type constant_constraint: ~gpytorch.priors.Interval, optional
-    :param batch_shape: The batch shape of the learned constant(s) (default: []).
-    :type batch_shape: torch.Size, optional
+    :type constant_prior: ~gpytorch.priors.Prior, optional.
+    :param constant_constraint: Constraint for constant parameter :math:`C`.
+    :type constant_constraint: ~gpytorch.priors.Interval, optional.
+    :param batch_shape: The batch shape of the learned constant(s) (default: []).
+    :type batch_shape: torch.Size, optional.
+
+    :var torch.Tensor constant: :math:`C` parameter
-    :var torch.Tensor constant: :math:`C` parameter
+    :var torch.Tensor constant: :math:`C` parameter.
-    :var torch.Tensor constant: :math:`C` parameter
+    :var torch.Tensor constant: :math:`C` parameter.
+    """
+
+    def __init__(
+        self,
+        constant_prior: Optional[Prior] = None,
+        constant_constraint: Optional[Interval] = None,
+        batch_shape: torch.Size = torch.Size(),
+        **kwargs: Any,
+    ):
         super(ConstantMean, self).__init__()
+
+        # Deprecated kwarg
+        constant_prior_deprecated = kwargs.get("prior")
+        if constant_prior_deprecated is not None:
+            if constant_prior is None:  # Using the old kwarg for the constant_prior
+                warnings.warn(
+                    "The kwarg `prior` for ConstantMean has been renamed to `constant_prior`, and will be deprecated.",
+                    DeprecationWarning,
+                )
+                constant_prior = constant_prior_deprecated
+            else:  # Weird edge case where someone set both `prior` and `constant_prior`
+                warnings.warn(
+                    "You have set both the `constant_prior` and the deprecated `prior` arguments for ConstantMean. "
+                    "`prior` is deprecated, and will be ignored.",
+                    DeprecationWarning,
+                )
+
+        # Ensure that old versions of the model still load
+        self._register_load_state_dict_pre_hook(_ensure_updated_strategy_flag_set)
+
         self.batch_shape = batch_shape
-        self.register_parameter(name="constant", parameter=torch.nn.Parameter(torch.zeros(*batch_shape, 1)))
-        if prior is not None:
-            self.register_prior("mean_prior", prior, self._constant_param, self._constant_closure)
+        self.register_parameter(name="raw_constant", parameter=torch.nn.Parameter(torch.zeros(batch_shape)))
+        if constant_prior is not None:
+            self.register_prior("mean_prior", constant_prior, self._constant_param, self._constant_closure)
+        if constant_constraint is not None:
+            self.register_constraint("raw_constant", constant_constraint)
+
+    @property
+    def constant(self):
+        return self._constant_param(self)
 
+    @constant.setter
+    def constant(self, value):
+        self._constant_closure(self, value)
+
+    # We need a getter of this form so that we can pickle ConstantMean modules with a mean prior, see PR #1992
     def _constant_param(self, m):
-        return m.constant
+        if hasattr(m, "raw_constant_constraint"):
+            return m.raw_constant_constraint.transform(m.raw_constant)
+        return m.raw_constant
 
+    # We need a setter of this form so that we can pickle ConstantMean modules with a mean prior, see PR #1992
     def _constant_closure(self, m, value):
         if not torch.is_tensor(value):
-            value = torch.as_tensor(value).to(self.constant)
-        m.initialize(constant=value.reshape(self.constant.shape))
+            value = torch.as_tensor(value).to(m.raw_constant)
 
-    def forward(self, input):
-        if input.shape[:-2] == self.batch_shape:
-            return self.constant.expand(input.shape[:-1])
+        if hasattr(m, "raw_constant_constraint"):
+            m.initialize(raw_constant=m.raw_constant_constraint.inverse_transform(value))
         else:
-            return self.constant.expand(_mul_broadcast_shape(input.shape[:-1], self.constant.shape))
+            m.initialize(raw_constant=value)
+
+    def forward(self, input):
+        constant = self.constant.unsqueeze(-1)  # *batch_shape x 1
+        return constant.expand(torch.broadcast_shapes(constant.shape, input.shape[:-1]))
diff --git a/test/means/test_constant_mean.py b/test/means/test_constant_mean.py
@@ -1,20 +1,40 @@
 #!/usr/bin/env python3
 
+import math
 import pickle
 import unittest
+import warnings
+from collections import OrderedDict
 
 import torch
 
+import gpytorch
+from gpytorch.constraints import GreaterThan
 from gpytorch.means import ConstantMean
 from gpytorch.priors import NormalPrior
 from gpytorch.test.base_mean_test_case import BaseMeanTestCase
+from gpytorch.utils.warnings import OldVersionWarning
+
+
+# Test class for loading models that have state dicts with the old ConstantMean parameter names
+class _GPModel(gpytorch.models.ExactGP):
+    def __init__(self, mean_module):
+        train_x = torch.randn(10, 3)
+        train_y = torch.randn(10)
+        likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        super().__init__(train_x, train_y, likelihood)
+        self.mean_module = mean_module
 
 
 class TestConstantMean(BaseMeanTestCase, unittest.TestCase):
     batch_shape = None
 
-    def create_mean(self, prior=None):
-        return ConstantMean(prior=prior, batch_shape=torch.Size([]))
+    def create_mean(self, prior=None, constraint=None):
+        return ConstantMean(
+            constant_prior=prior,
+            constant_constraint=constraint,
+            batch_shape=(self.__class__.batch_shape or torch.Size([])),
+        )
 
     def test_prior(self):
         if self.batch_shape is None:
@@ -28,16 +48,53 @@ def test_prior(self):
         mean._constant_closure(mean, value)
         self.assertTrue(torch.equal(mean.constant.data, value.reshape(mean.constant.data.shape)))
 
+    def test_constraint(self):
+        mean = self.create_mean()
+        self.assertAllClose(mean.constant, torch.zeros(mean.constant.shape))
+
+        constraint = GreaterThan(1.5)
+        mean = self.create_mean(constraint=constraint)
+        self.assertTrue(torch.all(mean.constant >= 1.5))
+        mean.constant = torch.full(self.__class__.batch_shape or torch.Size([]), fill_value=1.65)
+        self.assertAllClose(mean.constant, torch.tensor(1.65).expand(mean.constant.shape))
+
+    def test_loading_old_module(self):
+        batch_shape = self.__class__.batch_shape or torch.Size([])
+        constant = torch.randn(batch_shape)
+        mean = self.create_mean()
+        model = _GPModel(mean)
+
+        old_state_dict = OrderedDict(
+            [
+                ("likelihood.noise_covar.raw_noise", torch.tensor([0.0])),
+                ("likelihood.noise_covar.raw_noise_constraint.lower_bound", torch.tensor(1.0000e-04)),
+                ("likelihood.noise_covar.raw_noise_constraint.upper_bound", torch.tensor(math.inf)),
+                ("mean_module.constant", constant.unsqueeze(-1)),
+            ]
+        )
+        with warnings.catch_warnings(record=True) as ws:
+            warnings.simplefilter("always", OldVersionWarning)
+            model.load_state_dict(old_state_dict)
+            self.assertTrue(any(issubclass(w.category, OldVersionWarning) for w in ws))
+            self.assertEqual(model.mean_module.constant.data, constant)
+
+        new_state_dict = OrderedDict(
+            [
+                ("likelihood.noise_covar.raw_noise", torch.tensor([0.0])),
+                ("likelihood.noise_covar.raw_noise_constraint.lower_bound", torch.tensor(1.0000e-04)),
+                ("likelihood.noise_covar.raw_noise_constraint.upper_bound", torch.tensor(math.inf)),
+                ("mean_module.raw_constant", constant),
+            ]
+        )
+        with warnings.catch_warnings(record=True) as ws:
+            warnings.simplefilter("always", OldVersionWarning)
+            model.load_state_dict(new_state_dict)
+            self.assertFalse(any(issubclass(w.category, OldVersionWarning) for w in ws))
+
 
 class TestConstantMeanBatch(TestConstantMean, unittest.TestCase):
     batch_shape = torch.Size([3])
 
-    def create_mean(self, prior=None):
-        return ConstantMean(prior=prior, batch_shape=self.__class__.batch_shape)
-
 
 class TestConstantMeanMultiBatch(TestConstantMean, unittest.TestCase):
     batch_shape = torch.Size([2, 3])
-
-    def create_mean(self, prior=None):
-        return ConstantMean(prior=prior, batch_shape=self.__class__.batch_shape)