fix nearest_neighbor_variational_strategy batch compatibility

LuhuanWu · LuhuanWu · commit 0ae019f68d30 · 2023-07-08T23:27:49.000-07:00
diff --git a/gpytorch/variational/nearest_neighbor_variational_strategy.py b/gpytorch/variational/nearest_neighbor_variational_strategy.py
@@ -138,7 +138,7 @@ def __call__(self, x: Tensor, prior: bool = False, **kwargs: Any) -> Multivariat
         if x is not None:
             assert self.inducing_points.shape[:-2] == x.shape[:-2], (
                 f"x batch shape must matches inducing points batch shape, "
-                f"but got train data batch shape = {x.shape[:-2]}, "
+                f"but got x batch shape = {x.shape[:-2]}, "
                 f"inducing points batch shape = {self.inducing_points.shape[:-2]}."
             )
 
@@ -211,6 +211,7 @@ def forward(
             nn_indices = self.nn_util.find_nn_idx(x.float())
 
             x_batch_shape = x.shape[:-2]
+            batch_shape = torch.broadcast_shapes(self._model_batch_shape, x_batch_shape)
             x_bsz = x.shape[-2]
             assert nn_indices.shape == (*x_batch_shape, x_bsz, self.k), nn_indices.shape
 
@@ -221,7 +222,6 @@ def forward(
             assert inducing_points.shape == (*x_batch_shape, x_bsz, self.k, self.D)
 
             # get variational mean and covar for nearest neighbors
-            batch_shape = torch.broadcast_shapes(self._model_batch_shape, x_batch_shape)
             inducing_values = self._variational_distribution.variational_mean
             expanded_inducing_values = inducing_values.unsqueeze(-1).expand(*batch_shape, self.M, self.k)
             expanded_nn_indices = nn_indices.expand(*batch_shape, x_bsz, self.k)
@@ -239,16 +239,20 @@ def forward(
             # Make everything batch mode
             x = x.unsqueeze(-2)
             assert x.shape == (*x_batch_shape, x_bsz, 1, self.D)
+            x = x.expand(*batch_shape, x_bsz, 1, self.D)
 
             # Compute forward mode in the standard way
-            _x_batch_dims = tuple(range(len(x_batch_shape)))
-            _x = x.permute((-3,) + _x_batch_dims + (-2, -1))
-            _inducing_points = inducing_points.permute((-3,) + _x_batch_dims + (-2, -1))
-            _inducing_values = inducing_values.permute((-2,) + _x_batch_dims + (-1,))
-            _variational_inducing_covar = variational_inducing_covar.permute((-3,) + _x_batch_dims + (-2, -1))
+            _batch_dims = tuple(range(len(batch_shape)))
+            _x = x.permute((-3,) + _batch_dims + (-2, -1))  # (x_bsz, *batch_shape, 1, D)
+
+            # inducing_points.shape (*x_batch_shape, x_bsz, self.k, self.D)
+            inducing_points = inducing_points.expand(*batch_shape, x_bsz, self.k, self.D)
+            _inducing_points = inducing_points.permute((-3,) + _batch_dims + (-2, -1))  # (x_bsz, *batch_shape, k, D)
+            _inducing_values = inducing_values.permute((-2,) + _batch_dims + (-1,))
+            _variational_inducing_covar = variational_inducing_covar.permute((-3,) + _batch_dims + (-2, -1))
             dist = super().forward(_x, _inducing_points, _inducing_values, _variational_inducing_covar, **kwargs)
 
-            _x_batch_dims = tuple(range(1, 1 + len(x_batch_shape)))
+            _x_batch_dims = tuple(range(1, 1 + len(batch_shape)))
             predictive_mean = dist.mean  # (x_bsz, *x_batch_shape, 1)
             predictive_covar = dist.covariance_matrix  # (x_bsz, *x_batch_shape, 1, 1)
             predictive_mean = predictive_mean.permute(_x_batch_dims + (0, -1))
diff --git a/test/variational/test_nearest_neighbor_variational_strategy.py b/test/variational/test_nearest_neighbor_variational_strategy.py
@@ -115,7 +115,7 @@ def _training_iter(
         return output, loss
 
     def _eval_iter(self, model, cuda=False):
-        inducing_batch_shape = model.variational_strategy.inducing_points.shape[:-2]
+        inducing_batch_shape = model.variational_strategy._inducing_batch_shape
         test_x = torch.randn(*inducing_batch_shape, 32, 2).clamp(-2.5, 2.5)
         if cuda:
             test_x = test_x.cuda()