diff --git a/docs/source/conf.py b/docs/source/conf.py index 34f2818de..22070049a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -80,6 +80,7 @@ def find_version(*file_paths): extensions = [ "sphinx.ext.coverage", "sphinx.ext.mathjax", + 'sphinx.ext.napoleon', "sphinx.ext.viewcode", "sphinx.ext.githubpages", "sphinx.ext.autodoc", diff --git a/gpytorch/distributions/multitask_multivariate_normal.py b/gpytorch/distributions/multitask_multivariate_normal.py index 6da841a7f..7c8637979 100644 --- a/gpytorch/distributions/multitask_multivariate_normal.py +++ b/gpytorch/distributions/multitask_multivariate_normal.py @@ -119,7 +119,7 @@ def from_batch_mvn(cls, batch_mvn, task_dim=-1): def from_independent_mvns(cls, mvns): """ Convert an iterable of MVNs into a :obj:`~gpytorch.distributions.MultitaskMultivariateNormal`. - The resulting distribution will have :attr:`len(mvns)` tasks, and the tasks will be independent. + The resulting distribution will have ``len(mvns)`` tasks, and the tasks will be independent. :param ~gpytorch.distributions.MultitaskNormal mvn: The base MVN distributions. :returns: the independent multitask distribution @@ -247,7 +247,7 @@ def to_data_independent_dist(self): """ Convert a multitask MVN into a batched (non-multitask) MVNs The result retains the intertask covariances, but gets rid of the inter-data covariances. - The resulting distribution will have :attr:`len(mvns)` tasks, and the tasks will be independent. + The resulting distribution will have ``len(mvns)`` tasks, and the tasks will be independent. :returns: the bached data-independent MVN :rtype: gpytorch.distributions.MultivariateNormal diff --git a/gpytorch/functions/__init__.py b/gpytorch/functions/__init__.py index 5f40f591f..9084295ef 100644 --- a/gpytorch/functions/__init__.py +++ b/gpytorch/functions/__init__.py @@ -13,9 +13,9 @@ def add_diag(input, diag): Adds a diagonal matrix s*I to the input matrix input. Args: - :attr:`input` (Tensor (nxn) or (bxnxn)): + input (Tensor (nxn) or (bxnxn)): Tensor or LazyTensor wrapping matrix to add diagonal component to. - :attr:`diag` (scalar or Tensor (n) or Tensor (bxn) or Tensor (bx1)): + diag (scalar or Tensor (n) or Tensor (bxn) or Tensor (bx1)): Diagonal component to add to tensor Returns: @@ -89,7 +89,7 @@ def matmul(mat, rhs): def inv_matmul(mat, right_tensor, left_tensor=None): r""" - Computes a linear solve (w.r.t :attr:`mat` = :math:`A`) with several right hand sides :math:`R`. + Computes a linear solve (w.r.t mat = :math:`A`) with several right hand sides :math:`R`. I.e. computes ... math:: @@ -98,9 +98,9 @@ def inv_matmul(mat, right_tensor, left_tensor=None): A^{-1} R, \end{equation} - where :math:`R` is :attr:`right_tensor` and :math:`A` is :attr:`mat`. + where :math:`R` is right_tensor and :math:`A` is mat. - If :attr:`left_tensor` is supplied, computes + If left_tensor is supplied, computes ... math:: @@ -108,7 +108,7 @@ def inv_matmul(mat, right_tensor, left_tensor=None): L A^{-1} R, \end{equation} - where :math:`L` is :attr:`left_tensor`. Supplying this can reduce the number of + where :math:`L` is left_tensor. Supplying this can reduce the number of CG calls required. Args: @@ -181,7 +181,7 @@ def pivoted_cholesky(mat, rank, error_tol=None, return_pivots=None): :type mat: ~gpytorch.lazy.LazyTensor or ~torch.Tensor :param int rank: The size of the partial pivoted Cholesky factor. :param error_tol: Defines an optional stopping criterion. - If the residual of the factorization is less than :attr:`error_tol`, then the + If the residual of the factorization is less than error_tol, then the factorization will exit early. This will result in a :math:`\leq \text{ rank}` factor. :type error_tol: float, optional :param bool return_pivots: (default: False) Whether or not to return the pivots alongside diff --git a/gpytorch/functions/_diagonalization.py b/gpytorch/functions/_diagonalization.py index 81e60d08c..8f669efb6 100644 --- a/gpytorch/functions/_diagonalization.py +++ b/gpytorch/functions/_diagonalization.py @@ -14,7 +14,7 @@ def forward(ctx, representation_tree, device, dtype, matrix_shape, max_iter, bat :param list matrix_args: The arguments representing the symmetric matrix A (or batch of PSD matrices A) :rtype: (torch.Tensor, torch.Tensor) - :return: :attr:`Q`, :attr: `S` such that :math:`Q S Q^T \approx A` + :return: Q, S such that :math:`Q S Q^T \approx A` """ ctx.representation_tree = representation_tree diff --git a/gpytorch/functions/_root_decomposition.py b/gpytorch/functions/_root_decomposition.py index e906b5f2d..dd54df7dd 100644 --- a/gpytorch/functions/_root_decomposition.py +++ b/gpytorch/functions/_root_decomposition.py @@ -26,7 +26,7 @@ def forward( :param list matrix_args: The arguments representing the symmetric matrix A (or batch of PSD matrices A) :rtype: (torch.Tensor, torch.Tensor) - :return: :attr:`R`, such that :math:`R R^T \approx A`, and :attr:`R_inv`, such that + :return: R, such that :math:`R R^T \approx A`, and R_inv, such that :math:`R_{inv} R_{inv}^T \approx A^{-1}` (will only be populated if self.inverse = True) """ from ..lazy import lazify diff --git a/gpytorch/kernels/additive_structure_kernel.py b/gpytorch/kernels/additive_structure_kernel.py index 8565d95b4..1c2ba9c6e 100644 --- a/gpytorch/kernels/additive_structure_kernel.py +++ b/gpytorch/kernels/additive_structure_kernel.py @@ -26,11 +26,11 @@ class AdditiveStructureKernel(Kernel): of the additive terms in batch, making it very fast. Args: - :attr:`base_kernel` (Kernel): + base_kernel (Kernel): The kernel to approximate with KISS-GP - :attr:`num_dims` (int): + num_dims (int): The dimension of the input data. - :attr:`active_dims` (tuple of ints, optional): + active_dims (tuple of ints, optional): Passed down to the `base_kernel`. """ diff --git a/gpytorch/kernels/cosine_kernel.py b/gpytorch/kernels/cosine_kernel.py index 76261cc89..11add6f2f 100644 --- a/gpytorch/kernels/cosine_kernel.py +++ b/gpytorch/kernels/cosine_kernel.py @@ -25,22 +25,22 @@ class CosineKernel(Kernel): where :math:`p` is the period length parameter. Args: - :attr:`batch_shape` (torch.Size, optional): + batch_shape (torch.Size, optional): Set this if you want a separate lengthscale for each - batch of input data. It should be `b` if :attr:`x1` is a `b x n x d` tensor. Default: `torch.Size([])` - :attr:`active_dims` (tuple of ints, optional): + batch of input data. It should be `b` if x1 is a `b x n x d` tensor. Default: `torch.Size([])` + active_dims (tuple of ints, optional): Set this if you want to compute the covariance of only a few input dimensions. The ints corresponds to the indices of the dimensions. Default: `None`. - :attr:`period_length_prior` (Prior, optional): + period_length_prior (Prior, optional): Set this if you want to apply a prior to the period length parameter. Default: `None` - :attr:`period_length_constraint` (Constraint, optional): + period_length_constraint (Constraint, optional): Set this if you want to apply a constraint to the period length parameter. Default: `Positive`. - :attr:`eps` (float): + eps (float): The minimum value that the lengthscale/period length can take (prevents divide by zero errors). Default: `1e-6`. Attributes: - :attr:`period_length` (Tensor): + period_length (Tensor): The period length parameter. Size = `*batch_shape x 1 x 1`. Example: diff --git a/gpytorch/kernels/cylindrical_kernel.py b/gpytorch/kernels/cylindrical_kernel.py index 86251f943..48f24958c 100644 --- a/gpytorch/kernels/cylindrical_kernel.py +++ b/gpytorch/kernels/cylindrical_kernel.py @@ -21,19 +21,19 @@ class CylindricalKernel(Kernel): The data must lie completely within the unit ball. Args: - :attr:`num_angular_weights` (int): + num_angular_weights (int): The number of components in the angular kernel - :attr:`radial_base_kernel` (gpytorch.kernel): + radial_base_kernel (gpytorch.kernel): The base kernel for computing the radial kernel - :attr:`batch_size` (int, optional): + batch_size (int, optional): Set this if the data is batch of input data. - It should be `b` if :attr:`x1` is a `b x n x d` tensor. Default: `1` - :attr:`eps` (float): + It should be `b` if x1 is a `b x n x d` tensor. Default: `1` + eps (float): Small floating point number used to improve numerical stability in kernel computations. Default: `1e-6` - :attr:`param_transform` (function, optional): + param_transform (function, optional): Set this if you want to use something other than softplus to ensure positiveness of parameters. - :attr:`inv_param_transform` (function, optional): + inv_param_transform (function, optional): Set this to allow setting parameters directly in transformed space and sampling from priors. Automatically inferred for common transformations such as torch.exp or torch.nn.functional.softplus. """ diff --git a/gpytorch/kernels/distributional_input_kernel.py b/gpytorch/kernels/distributional_input_kernel.py index e01b3956c..082fbc917 100644 --- a/gpytorch/kernels/distributional_input_kernel.py +++ b/gpytorch/kernels/distributional_input_kernel.py @@ -20,7 +20,7 @@ class DistributionalInputKernel(Kernel): where :math:`a` is the lengthscale. Args: - :attr:`distance_function` (function) distance function between distributional inputs. + distance_function (function) distance function between distributional inputs. """ has_lengthscale = True diff --git a/gpytorch/kernels/gaussian_symmetrized_kl_kernel.py b/gpytorch/kernels/gaussian_symmetrized_kl_kernel.py index 11cec5948..2c5bded31 100644 --- a/gpytorch/kernels/gaussian_symmetrized_kl_kernel.py +++ b/gpytorch/kernels/gaussian_symmetrized_kl_kernel.py @@ -10,11 +10,11 @@ def _symmetrized_kl(dist1, dist2, eps=1e-8): the first half of the distribution tensors are the mean, and the second half are the log variances. Args: - :attr:`dist1` (torch.Tensor) has shapes batch x n x dimensions. The first half + dist1 (torch.Tensor) has shapes batch x n x dimensions. The first half of the last dimensions are the means, while the second half are the log-variances. - :attr:`dist2` (torch.Tensor) has shapes batch x n x dimensions. The first half + dist2 (torch.Tensor) has shapes batch x n x dimensions. The first half of the last dimensions are the means, while the second half are the log-variances. - :attr:`eps` (float) jitter term for the noise variance + eps (float) jitter term for the noise variance """ num_dims = int(dist1.shape[-1] / 2) diff --git a/gpytorch/kernels/grid_interpolation_kernel.py b/gpytorch/kernels/grid_interpolation_kernel.py index 4c2c22f4e..527e2f57f 100644 --- a/gpytorch/kernels/grid_interpolation_kernel.py +++ b/gpytorch/kernels/grid_interpolation_kernel.py @@ -37,12 +37,12 @@ class GridInterpolationKernel(GridKernel): * :math:`\mathbf{w_{x_1}}` and :math:`\mathbf{w_{x_2}}` are sparse vectors based on :math:`\mathbf{x_1}` and :math:`\mathbf{x_2}` that apply cubic interpolation. - The user should supply the size of the grid (using the :attr:`grid_size` attribute). + The user should supply the size of the grid (using the grid_size attribute). To choose a reasonable grid value, we highly recommend using the :func:`gpytorch.utils.grid.choose_grid_size` helper function. The bounds of the grid will automatically be determined by data. - (Alternatively, you can hard-code bounds using the :attr:`grid_bounds`, which + (Alternatively, you can hard-code bounds using the grid_bounds, which will speed up this kernel's computations.) .. note:: @@ -51,18 +51,18 @@ class GridInterpolationKernel(GridKernel): Periodic, Spectral Mixture, etc.) Args: - - :attr:`base_kernel` (Kernel): + base_kernel (Kernel): The kernel to approximate with KISS-GP - - :attr:`grid_size` (Union[int, List[int]]): + grid_size (Union[int, List[int]]): The size of the grid in each dimension. If a single int is provided, then every dimension will have the same grid size. - - :attr:`num_dims` (int): + num_dims (int): The dimension of the input data. Required if `grid_bounds=None` - - :attr:`grid_bounds` (tuple(float, float), optional): + grid_bounds (tuple(float, float), optional): The bounds of the grid, if known (high performance mode). The length of the tuple must match the number of dimensions. The entries represent the min/max values for each dimension. - - :attr:`active_dims` (tuple of ints, optional): + active_dims (tuple of ints, optional): Passed down to the `base_kernel`. .. _Kernel Interpolation for Scalable Structured Gaussian Processes: diff --git a/gpytorch/kernels/grid_kernel.py b/gpytorch/kernels/grid_kernel.py index 41ac23c0d..fabc0ab05 100644 --- a/gpytorch/kernels/grid_kernel.py +++ b/gpytorch/kernels/grid_kernel.py @@ -25,14 +25,14 @@ class GridKernel(Kernel): Periodic, Spectral Mixture, etc.) Args: - :attr:`base_kernel` (Kernel): + base_kernel (Kernel): The kernel to speed up with grid methods. - :attr:`grid` (Tensor): + grid (Tensor): A g x d tensor where column i consists of the projections of the grid in dimension i. - :attr:`active_dims` (tuple of ints, optional): + active_dims (tuple of ints, optional): Passed down to the `base_kernel`. - :attr:`interpolation_mode` (bool): + interpolation_mode (bool): Used for GridInterpolationKernel where we want the covariance between points in the projections of the grid of each dimension. We do this by treating `grid` as d batches of g x 1 tensors by diff --git a/gpytorch/kernels/index_kernel.py b/gpytorch/kernels/index_kernel.py index 76e0bc05b..c455f0c95 100644 --- a/gpytorch/kernels/index_kernel.py +++ b/gpytorch/kernels/index_kernel.py @@ -25,18 +25,18 @@ class IndexKernel(Kernel): These parameters are learned. Args: - :attr:`num_tasks` (int): + num_tasks (int): Total number of indices. - :attr:`batch_shape` (torch.Size, optional): + batch_shape (torch.Size, optional): Set if the MultitaskKernel is operating on batches of data (and you want different parameters for each batch) - :attr:`rank` (int): + rank (int): Rank of :math:`B` matrix. Controls the degree of correlation between the outputs. With a rank of 1 the outputs are identical except for a scaling factor. - :attr:`prior` (:obj:`gpytorch.priors.Prior`): + prior (:obj:`gpytorch.priors.Prior`): Prior for :math:`B` matrix. - :attr:`var_constraint` (Constraint, optional): + var_constraint (Constraint, optional): Constraint for added diagonal component. Default: `Positive`. Attributes: diff --git a/gpytorch/kernels/kernel.py b/gpytorch/kernels/kernel.py index ac7a90b00..ecd7283a5 100644 --- a/gpytorch/kernels/kernel.py +++ b/gpytorch/kernels/kernel.py @@ -97,30 +97,30 @@ class Kernel(Module): .. note:: - The :attr:`lengthscale` parameter is parameterized on a log scale to constrain it to be positive. - You can set a prior on this parameter using the :attr:`lengthscale_prior` argument. + The lengthscale parameter is parameterized on a log scale to constrain it to be positive. + You can set a prior on this parameter using the lengthscale_prior argument. - Base Args: - :attr:`ard_num_dims` (int, optional): + Args: + ard_num_dims (int, optional): Set this if you want a separate lengthscale for each input - dimension. It should be `d` if :attr:`x1` is a `n x d` matrix. Default: `None` - :attr:`batch_shape` (torch.Size, optional): + dimension. It should be `d` if x1 is a `n x d` matrix. Default: `None` + batch_shape (torch.Size, optional): Set this if you want a separate lengthscale for each batch of input - data. It should be `b1 x ... x bk` if :attr:`x1` is a `b1 x ... x bk x n x d` tensor. - :attr:`active_dims` (tuple of ints, optional): + data. It should be `b1 x ... x bk` if x1 is a `b1 x ... x bk x n x d` tensor. + active_dims (tuple of ints, optional): Set this if you want to compute the covariance of only a few input dimensions. The ints corresponds to the indices of the dimensions. Default: `None`. - :attr:`lengthscale_prior` (Prior, optional): + lengthscale_prior (Prior, optional): Set this if you want to apply a prior to the lengthscale parameter. Default: `None` - :attr:`lengthscale_constraint` (Constraint, optional): + lengthscale_constraint (Constraint, optional): Set this if you want to apply a constraint to the lengthscale parameter. Default: `Positive`. - :attr:`eps` (float): + eps (float): The minimum value that the lengthscale can take (prevents divide by zero errors). Default: `1e-6`. - Base Attributes: - :attr:`lengthscale` (Tensor): + Attributes: + lengthscale (Tensor): The lengthscale parameter. Size/shape of parameter depends on the - :attr:`ard_num_dims` and :attr:`batch_shape` arguments. + ard_num_dims and batch_shape arguments. Example: >>> covar_module = gpytorch.kernels.LinearKernel() @@ -188,13 +188,13 @@ def forward(self, x1, x2, diag=False, last_dim_is_batch=False, **params): This method should be imlemented by all Kernel subclasses. Args: - :attr:`x1` (Tensor `n x d` or `b x n x d`): + x1 (Tensor `n x d` or `b x n x d`): First set of data - :attr:`x2` (Tensor `m x d` or `b x m x d`): + x2 (Tensor `m x d` or `b x m x d`): Second set of data - :attr:`diag` (bool): + diag (bool): Should the Kernel compute the whole kernel, or just the diag? - :attr:`last_dim_is_batch` (tuple, optional): + last_dim_is_batch (tuple, optional): If this is true, it treats the last dimension of the data as another batch dimension. (Useful for additive structure over the dimensions). Default: False @@ -284,15 +284,15 @@ def covar_dist( all pairs of points in x1 and x2. Args: - :attr:`x1` (Tensor `n x d` or `b1 x ... x bk x n x d`): + x1 (Tensor `n x d` or `b1 x ... x bk x n x d`): First set of data. - :attr:`x2` (Tensor `m x d` or `b1 x ... x bk x m x d`): + x2 (Tensor `m x d` or `b1 x ... x bk x m x d`): Second set of data. - :attr:`diag` (bool): + diag (bool): Should we return the whole distance matrix, or just the diagonal? If True, we must have `x1 == x2`. - :attr:`last_dim_is_batch` (tuple, optional): + last_dim_is_batch (tuple, optional): Is the last dimension of the data a batch dimension or not? - :attr:`square_dist` (bool): + square_dist (bool): Should we square the distance matrix before returning? Returns: diff --git a/gpytorch/kernels/linear_kernel.py b/gpytorch/kernels/linear_kernel.py index 5c4fc5a9a..27cc33958 100644 --- a/gpytorch/kernels/linear_kernel.py +++ b/gpytorch/kernels/linear_kernel.py @@ -24,7 +24,7 @@ class LinearKernel(Kernel): where - * :math:`v` is a :attr:`variance` parameter. + * :math:`v` is a variance parameter. .. note:: @@ -37,11 +37,11 @@ class LinearKernel(Kernel): :math:`O(nd)` time and space. Args: - :attr:`variance_prior` (:class:`gpytorch.priors.Prior`): + variance_prior (:class:`gpytorch.priors.Prior`): Prior over the variance parameter (default `None`). - :attr:`variance_constraint` (Constraint, optional): + variance_constraint (Constraint, optional): Constraint to place on variance parameter. Default: `Positive`. - :attr:`active_dims` (list): + active_dims (list): List of data dimensions to operate on. `len(active_dims)` should equal `num_dimensions`. """ diff --git a/gpytorch/kernels/matern_kernel.py b/gpytorch/kernels/matern_kernel.py index 04503316b..0173ced30 100644 --- a/gpytorch/kernels/matern_kernel.py +++ b/gpytorch/kernels/matern_kernel.py @@ -26,7 +26,7 @@ class MaternKernel(Kernel): * :math:`d = (\mathbf{x_1} - \mathbf{x_2})^\top \Theta^{-2} (\mathbf{x_1} - \mathbf{x_2})` is the distance between - :math:`x_1` and :math:`x_2` scaled by the :attr:`lengthscale` parameter :math:`\Theta`. + :math:`x_1` and :math:`x_2` scaled by the lengthscale parameter :math:`\Theta`. * :math:`\nu` is a smoothness parameter (takes values 1/2, 3/2, or 5/2). Smaller values are less smooth. * :math:`K_\nu` is a modified Bessel function. @@ -41,7 +41,7 @@ class MaternKernel(Kernel): :param nu: (Default: 2.5) The smoothness parameter. :type nu: float (0.5, 1.5, or 2.5) :param ard_num_dims: (Default: `None`) Set this if you want a separate lengthscale for each - input dimension. It should be `d` if :attr:`x1` is a `... x n x d` matrix. + input dimension. It should be `d` if x1 is a `... x n x d` matrix. :type ard_num_dims: int, optional :param batch_shape: (Default: `None`) Set this if you want a separate lengthscale for each batch of input data. It should be `torch.Size([b1, b2])` for a `b1 x b2 x n x m` kernel output. @@ -59,9 +59,6 @@ class MaternKernel(Kernel): :param eps: (Default: 1e-6) The minimum value that the lengthscale can take (prevents divide by zero errors). :type eps: float, optional - :var torch.Tensor lengthscale: The lengthscale parameter. Size/shape of parameter depends on the - :attr:`ard_num_dims` and :attr:`batch_shape` arguments. - Example: >>> x = torch.randn(10, 5) >>> # Non-batch: Simple option diff --git a/gpytorch/kernels/multi_device_kernel.py b/gpytorch/kernels/multi_device_kernel.py index 73c32c69f..7e5acccee 100644 --- a/gpytorch/kernels/multi_device_kernel.py +++ b/gpytorch/kernels/multi_device_kernel.py @@ -15,9 +15,9 @@ class MultiDeviceKernel(DataParallel, Kernel): Allocates the covariance matrix on distributed devices, e.g. multiple GPUs. Args: - - :attr:`base_kernel`: Base kernel to distribute - - :attr:`device_ids`: list of `torch.device` objects to place kernel chunks on - - :attr:`output_device`: Device where outputs will be placed + base_kernel: Base kernel to distribute + device_ids: list of `torch.device` objects to place kernel chunks on + output_device: Device where outputs will be placed """ def __init__( diff --git a/gpytorch/kernels/periodic_kernel.py b/gpytorch/kernels/periodic_kernel.py index df08f00db..1232b96ae 100644 --- a/gpytorch/kernels/periodic_kernel.py +++ b/gpytorch/kernels/periodic_kernel.py @@ -38,7 +38,7 @@ class PeriodicKernel(Kernel): decorate this kernel with a :class:`gpytorch.kernels.ScaleKernel`. :param ard_num_dims: (Default: `None`) Set this if you want a separate lengthscale for each - input dimension. It should be `d` if :attr:`x1` is a `... x n x d` matrix. + input dimension. It should be `d` if x1 is a `... x n x d` matrix. :type ard_num_dims: int, optional :param batch_shape: (Default: `None`) Set this if you want a separate lengthscale for each batch of input data. It should be `torch.Size([b1, b2])` for a `b1 x b2 x n x m` kernel output. @@ -62,10 +62,8 @@ class PeriodicKernel(Kernel): :param eps: (Default: 1e-6) The minimum value that the lengthscale can take (prevents divide by zero errors). :type eps: float, optional - :var torch.Tensor lengthscale: The lengthscale parameter. Size/shape of parameter depends on the - :attr:`ard_num_dims` and :attr:`batch_shape` arguments. :var torch.Tensor period_length: The period length parameter. Size/shape of parameter depends on the - :attr:`ard_num_dims` and :attr:`batch_shape` arguments. + ard_num_dims and batch_shape arguments. Example: >>> x = torch.randn(10, 5) diff --git a/gpytorch/kernels/piecewise_polynomial_kernel.py b/gpytorch/kernels/piecewise_polynomial_kernel.py index 97437b2e7..26abf0951 100644 --- a/gpytorch/kernels/piecewise_polynomial_kernel.py +++ b/gpytorch/kernels/piecewise_polynomial_kernel.py @@ -33,7 +33,7 @@ class PiecewisePolynomialKernel(Kernel): :param int q: (default= 2) The smoothness parameter. :type q: int (0, 1, 2 or 3) :param ard_num_dims: (Default: `None`) Set this if you want a separate lengthscale for each - input dimension. It should be `d` if :attr:`x1` is a `... x n x d` matrix. + input dimension. It should be `d` if x1 is a `... x n x d` matrix. :type ard_num_dims: int, optional :param batch_shape: (Default: `None`) Set this if you want a separate lengthscale for each batch of input data. It should be `torch.Size([b1, b2])` for a `b1 x b2 x n x m` kernel output. @@ -51,9 +51,6 @@ class PiecewisePolynomialKernel(Kernel): :param eps: (Default: 1e-6) The minimum value that the lengthscale can take (prevents divide by zero errors). :type eps: float, optional - :var torch.Tensor lengthscale: The lengthscale parameter. Size/shape of parameter depends on the - :attr:`ard_num_dims` and :attr:`batch_shape` arguments. - .. _Rasmussen and Williams (2006): http://www.gaussianprocess.org/gpml/ diff --git a/gpytorch/kernels/polynomial_kernel.py b/gpytorch/kernels/polynomial_kernel.py index 405051bc9..3a98e8d4e 100644 --- a/gpytorch/kernels/polynomial_kernel.py +++ b/gpytorch/kernels/polynomial_kernel.py @@ -22,14 +22,14 @@ class PolynomialKernel(Kernel): where - * :math:`c` is an :attr:`offset` parameter. + * :math:`c` is an offset parameter. Args: - :attr:`offset_prior` (:class:`gpytorch.priors.Prior`): + offset_prior (:class:`gpytorch.priors.Prior`): Prior over the offset parameter (default `None`). - :attr:`offset_constraint` (Constraint, optional): + offset_constraint (Constraint, optional): Constraint to place on offset parameter. Default: `Positive`. - :attr:`active_dims` (list): + active_dims (list): List of data dimensions to operate on. `len(active_dims)` should equal `num_dimensions`. """ diff --git a/gpytorch/kernels/product_structure_kernel.py b/gpytorch/kernels/product_structure_kernel.py index b21fcd9f0..89fa87998 100644 --- a/gpytorch/kernels/product_structure_kernel.py +++ b/gpytorch/kernels/product_structure_kernel.py @@ -29,11 +29,11 @@ class ProductStructureKernel(Kernel): See `Product Kernel Interpolation for Scalable Gaussian Processes`_ for more detail. Args: - - :attr:`base_kernel` (Kernel): + base_kernel (Kernel): The kernel to approximate with KISS-GP - - :attr:`num_dims` (int): + num_dims (int): The dimension of the input data. - - :attr:`active_dims` (tuple of ints, optional): + active_dims (tuple of ints, optional): Passed down to the `base_kernel`. .. _Product Kernel Interpolation for Scalable Gaussian Processes: diff --git a/gpytorch/kernels/rbf_kernel.py b/gpytorch/kernels/rbf_kernel.py index 09b3b63a2..4e708f26b 100644 --- a/gpytorch/kernels/rbf_kernel.py +++ b/gpytorch/kernels/rbf_kernel.py @@ -21,7 +21,7 @@ class RBFKernel(Kernel): (\mathbf{x_1} - \mathbf{x_2})^\top \Theta^{-2} (\mathbf{x_1} - \mathbf{x_2}) \right) \end{equation*} - where :math:`\Theta` is a :attr:`lengthscale` parameter. + where :math:`\Theta` is a lengthscale parameter. See :class:`gpytorch.kernels.Kernel` for descriptions of the lengthscale options. .. note:: @@ -30,26 +30,26 @@ class RBFKernel(Kernel): decorate this kernel with a :class:`gpytorch.kernels.ScaleKernel`. Args: - :attr:`ard_num_dims` (int, optional): + ard_num_dims (int, optional): Set this if you want a separate lengthscale for each - input dimension. It should be `d` if :attr:`x1` is a `n x d` matrix. Default: `None` - :attr:`batch_shape` (torch.Size, optional): + input dimension. It should be `d` if x1 is a `n x d` matrix. Default: `None` + batch_shape (torch.Size, optional): Set this if you want a separate lengthscale for each - batch of input data. It should be `b` if :attr:`x1` is a `b x n x d` tensor. Default: `torch.Size([])`. - :attr:`active_dims` (tuple of ints, optional): + batch of input data. It should be `b` if x1 is a `b x n x d` tensor. Default: `torch.Size([])`. + active_dims (tuple of ints, optional): Set this if you want to compute the covariance of only a few input dimensions. The ints corresponds to the indices of the dimensions. Default: `None`. - :attr:`lengthscale_prior` (Prior, optional): + lengthscale_prior (Prior, optional): Set this if you want to apply a prior to the lengthscale parameter. Default: `None`. - :attr:`lengthscale_constraint` (Constraint, optional): + lengthscale_constraint (Constraint, optional): Set this if you want to apply a constraint to the lengthscale parameter. Default: `Positive`. - :attr:`eps` (float): + eps (float): The minimum value that the lengthscale can take (prevents divide by zero errors). Default: `1e-6`. Attributes: - :attr:`lengthscale` (Tensor): + lengthscale (Tensor): The lengthscale parameter. Size/shape of parameter depends on the - :attr:`ard_num_dims` and :attr:`batch_shape` arguments. + ard_num_dims and batch_shape arguments. Example: >>> x = torch.randn(10, 5) diff --git a/gpytorch/kernels/rbf_kernel_grad.py b/gpytorch/kernels/rbf_kernel_grad.py index 568bc280f..4e66e8d5e 100644 --- a/gpytorch/kernels/rbf_kernel_grad.py +++ b/gpytorch/kernels/rbf_kernel_grad.py @@ -19,23 +19,23 @@ class RBFKernelGrad(RBFKernel): decorate this kernel with a :class:`gpytorch.kernels.ScaleKernel`. Args: - :attr:`batch_shape` (torch.Size, optional): + batch_shape (torch.Size, optional): Set this if you want a separate lengthscale for each - batch of input data. It should be `b` if :attr:`x1` is a `b x n x d` tensor. Default: `torch.Size([])`. - :attr:`active_dims` (tuple of ints, optional): + batch of input data. It should be `b` if x1 is a `b x n x d` tensor. Default: `torch.Size([])`. + active_dims (tuple of ints, optional): Set this if you want to compute the covariance of only a few input dimensions. The ints corresponds to the indices of the dimensions. Default: `None`. - :attr:`lengthscale_prior` (Prior, optional): + lengthscale_prior (Prior, optional): Set this if you want to apply a prior to the lengthscale parameter. Default: `None`. - :attr:`lengthscale_constraint` (Constraint, optional): + lengthscale_constraint (Constraint, optional): Set this if you want to apply a constraint to the lengthscale parameter. Default: `Positive`. - :attr:`eps` (float): + eps (float): The minimum value that the lengthscale can take (prevents divide by zero errors). Default: `1e-6`. Attributes: - :attr:`lengthscale` (Tensor): + lengthscale (Tensor): The lengthscale parameter. Size/shape of parameter depends on the - :attr:`ard_num_dims` and :attr:`batch_shape` arguments. + ard_num_dims and batch_shape arguments. Example: >>> x = torch.randn(10, 5) diff --git a/gpytorch/kernels/rq_kernel.py b/gpytorch/kernels/rq_kernel.py index 6694831f0..2b13fe214 100644 --- a/gpytorch/kernels/rq_kernel.py +++ b/gpytorch/kernels/rq_kernel.py @@ -20,7 +20,7 @@ class RQKernel(Kernel): (\mathbf{x_1} - \mathbf{x_2})^\top \Theta^{-2} (\mathbf{x_1} - \mathbf{x_2}) \right)^{-\alpha} \end{equation*} - where :math:`\Theta` is a :attr:`lengthscale` parameter, and :math:`\alpha` is the + where :math:`\Theta` is a lengthscale parameter, and :math:`\alpha` is the rational quadratic relative weighting parameter. See :class:`gpytorch.kernels.Kernel` for descriptions of the lengthscale options. @@ -30,31 +30,31 @@ class RQKernel(Kernel): decorate this kernel with a :class:`gpytorch.kernels.ScaleKernel`. Args: - :attr:`ard_num_dims` (int, optional): + ard_num_dims (int, optional): Set this if you want a separate lengthscale for each - input dimension. It should be `d` if :attr:`x1` is a `n x d` matrix. Default: `None` - :attr:`batch_shape` (torch.Size, optional): + input dimension. It should be `d` if x1 is a `n x d` matrix. Default: `None` + batch_shape (torch.Size, optional): Set this if you want a separate lengthscale for each - batch of input data. It should be `b` if :attr:`x1` is a `b x n x d` tensor. Default: `torch.Size([])`. - :attr:`active_dims` (tuple of ints, optional): + batch of input data. It should be `b` if x1 is a `b x n x d` tensor. Default: `torch.Size([])`. + active_dims (tuple of ints, optional): Set this if you want to compute the covariance of only a few input dimensions. The ints corresponds to the indices of the dimensions. Default: `None`. - :attr:`lengthscale_prior` (Prior, optional): + lengthscale_prior (Prior, optional): Set this if you want to apply a prior to the lengthscale parameter. Default: `None`. - :attr:`lengthscale_constraint` (Constraint, optional): + lengthscale_constraint (Constraint, optional): Set this if you want to apply a constraint to the lengthscale parameter. Default: `Positive`. - :attr:`alpha_constraint` (Constraint, optional): + alpha_constraint (Constraint, optional): Set this if you want to apply a constraint to the alpha parameter. Default: `Positive`. - :attr:`eps` (float): + eps (float): The minimum value that the lengthscale can take (prevents divide by zero errors). Default: `1e-6`. Attributes: - :attr:`lengthscale` (Tensor): + lengthscale (Tensor): The lengthscale parameter. Size/shape of parameter depends on the - :attr:`ard_num_dims` and :attr:`batch_shape` arguments. - :attr:`alpha` (Tensor): + ard_num_dims and batch_shape arguments. + alpha (Tensor): The rational quadratic relative weighting parameter. Size/shape of parameter depends - on the :attr:`batch_shape` argument + on the batch_shape argument """ has_lengthscale = True diff --git a/gpytorch/kernels/scale_kernel.py b/gpytorch/kernels/scale_kernel.py index 12f2823a8..a2a6fffda 100644 --- a/gpytorch/kernels/scale_kernel.py +++ b/gpytorch/kernels/scale_kernel.py @@ -27,25 +27,25 @@ class ScaleKernel(Kernel): keyword argument to the appropriate number of batches. .. note:: - The :attr:`outputscale` parameter is parameterized on a log scale to constrain it to be positive. - You can set a prior on this parameter using the :attr:`outputscale_prior` argument. + The outputscale parameter is parameterized on a log scale to constrain it to be positive. + You can set a prior on this parameter using the outputscale_prior argument. Args: - :attr:`base_kernel` (Kernel): + base_kernel (Kernel): The base kernel to be scaled. - :attr:`batch_shape` (int, optional): + batch_shape (int, optional): Set this if you want a separate outputscale for each batch of input data. It should be `b` - if :attr:`x1` is a `b x n x d` tensor. Default: `torch.Size([])` - :attr:`outputscale_prior` (Prior, optional): Set this if you want to apply a prior to the outputscale + if x1 is a `b x n x d` tensor. Default: `torch.Size([])` + outputscale_prior (Prior, optional): Set this if you want to apply a prior to the outputscale parameter. Default: `None` - :attr:`outputscale_constraint` (Constraint, optional): Set this if you want to apply a constraint to the + outputscale_constraint (Constraint, optional): Set this if you want to apply a constraint to the outputscale parameter. Default: `Positive`. Attributes: - :attr:`base_kernel` (Kernel): + base_kernel (Kernel): The kernel module to be scaled. - :attr:`outputscale` (Tensor): - The outputscale parameter. Size/shape of parameter depends on the :attr:`batch_shape` arguments. + outputscale (Tensor): + The outputscale parameter. Size/shape of parameter depends on the batch_shape arguments. Example: >>> x = torch.randn(10, 5) diff --git a/gpytorch/kernels/spectral_mixture_kernel.py b/gpytorch/kernels/spectral_mixture_kernel.py index 40f8ad1e8..e63185ff4 100644 --- a/gpytorch/kernels/spectral_mixture_kernel.py +++ b/gpytorch/kernels/spectral_mixture_kernel.py @@ -23,14 +23,14 @@ class SpectralMixtureKernel(Kernel): .. note:: Unlike other kernels, - * :attr:`ard_num_dims` **must equal** the number of dimensions of the data. + * ard_num_dims **must equal** the number of dimensions of the data. * This kernel should not be combined with a :class:`gpytorch.kernels.ScaleKernel`. :param int num_mixtures: The number of components in the mixture. :param int ard_num_dims: Set this to match the dimensionality of the input. - It should be `d` if :attr:`x1` is a `... x n x d` matrix. (Default: `1`.) + It should be `d` if x1 is a `... x n x d` matrix. (Default: `1`.) :param batch_shape: Set this if the data is batch of input data. It should - be `b_1 x ... x b_j` if :attr:`x1` is a `b_1 x ... x b_j x n x d` tensor. (Default: `torch.Size([])`.) + be `b_1 x ... x b_j` if x1 is a `b_1 x ... x b_j x n x d` tensor. (Default: `torch.Size([])`.) :type batch_shape: torch.Size, optional :param active_dims: Set this if you want to compute the covariance of only a few input dimensions. The ints corresponds to the indices of the dimensions. (Default: `None`.) @@ -38,17 +38,17 @@ class SpectralMixtureKernel(Kernel): :param eps: The minimum value that the lengthscale can take (prevents divide by zero errors). (Default: `1e-6`.) :type eps: float, optional - :param mixture_scales_prior: A prior to set on the :attr:`mixture_scales` parameter + :param mixture_scales_prior: A prior to set on the mixture_scales parameter :type mixture_scales_prior: ~gpytorch.priors.Prior, optional - :param mixture_scales_constraint: A constraint to set on the :attr:`mixture_scales` parameter + :param mixture_scales_constraint: A constraint to set on the mixture_scales parameter :type mixture_scales_constraint: ~gpytorch.constraints.Interval, optional - :param mixture_means_prior: A prior to set on the :attr:`mixture_means` parameter + :param mixture_means_prior: A prior to set on the mixture_means parameter :type mixture_means_prior: ~gpytorch.priors.Prior, optional - :param mixture_means_constraint: A constraint to set on the :attr:`mixture_means` parameter + :param mixture_means_constraint: A constraint to set on the mixture_means parameter :type mixture_means_constraint: ~gpytorch.constraints.Interval, optional - :param mixture_weights_prior: A prior to set on the :attr:`mixture_weights` parameter + :param mixture_weights_prior: A prior to set on the mixture_weights parameter :type mixture_weights_prior: ~gpytorch.priors.Prior, optional - :param mixture_weights_constraint: A constraint to set on the :attr:`mixture_weights` parameter + :param mixture_weights_constraint: A constraint to set on the mixture_weights parameter :type mixture_weights_constraint: ~gpytorch.constraints.Interval, optional :ivar torch.Tensor mixture_scales: The lengthscale parameter. Given diff --git a/gpytorch/lazy/block_diag_lazy_tensor.py b/gpytorch/lazy/block_diag_lazy_tensor.py index 439fa393f..95cacba82 100644 --- a/gpytorch/lazy/block_diag_lazy_tensor.py +++ b/gpytorch/lazy/block_diag_lazy_tensor.py @@ -13,15 +13,15 @@ class BlockDiagLazyTensor(BlockLazyTensor): """ Represents a lazy tensor that is the block diagonal of square matrices. - The :attr:`block_dim` attribute specifies which dimension of the base LazyTensor + The block_dim attribute specifies which dimension of the base LazyTensor specifies the blocks. For example, (with `block_dim=-3` a `k x n x n` tensor represents `k` `n x n` blocks (a `kn x kn` matrix). A `b x k x n x n` tensor represents `k` `b x n x n` blocks (a `b x kn x kn` batch matrix). Args: - :attr:`base_lazy_tensor` (LazyTensor or Tensor): + base_lazy_tensor (LazyTensor or Tensor): Must be at least 3 dimensional. - :attr:`block_dim` (int): + block_dim (int): The dimension that specifies the blocks. """ diff --git a/gpytorch/lazy/block_interleaved_lazy_tensor.py b/gpytorch/lazy/block_interleaved_lazy_tensor.py index 307c99200..0aadea0c5 100644 --- a/gpytorch/lazy/block_interleaved_lazy_tensor.py +++ b/gpytorch/lazy/block_interleaved_lazy_tensor.py @@ -9,15 +9,15 @@ class BlockInterleavedLazyTensor(BlockLazyTensor): """ Represents a lazy tensor that is the block diagonal of square matrices. - The :attr:`block_dim` attribute specifies which dimension of the base LazyTensor + The block_dim attribute specifies which dimension of the base LazyTensor specifies the blocks. For example, (with `block_dim=-3` a `k x n x n` tensor represents `k` `n x n` blocks (a `kn x kn` matrix). A `b x k x n x n` tensor represents `k` `b x n x n` blocks (a `b x kn x kn` batch matrix). Args: - :attr:`base_lazy_tensor` (LazyTensor or Tensor): + base_lazy_tensor (LazyTensor or Tensor): Must be at least 3 dimensional. - :attr:`block_dim` (int): + block_dim (int): The dimension that specifies the blocks. """ diff --git a/gpytorch/lazy/block_lazy_tensor.py b/gpytorch/lazy/block_lazy_tensor.py index 5b87f4d3e..277a1df51 100644 --- a/gpytorch/lazy/block_lazy_tensor.py +++ b/gpytorch/lazy/block_lazy_tensor.py @@ -16,15 +16,15 @@ class BlockLazyTensor(LazyTensor): (e.g. block diagonal, sum over blocks, etc.) BlockLazyTensors represent the groups of blocks as a batched Tensor. - The :attr:block_dim` attribute specifies which dimension of the base LazyTensor + The block_dim attribute specifies which dimension of the base LazyTensor specifies the blocks. For example, (with `block_dim=-3` a `k x n x n` tensor represents `k` `n x n` blocks. A `b x k x n x n` tensor represents `k` `b x n x n` blocks. Args: - - :attr:`base_lazy_tensor` (LazyTensor or Tensor): + base_lazy_tensor (LazyTensor or Tensor): Must be at least 3 dimenional. - - :attr:`block_dim` (int): + block_dim (int): The dimension that specifies blocks. """ diff --git a/gpytorch/lazy/cat_lazy_tensor.py b/gpytorch/lazy/cat_lazy_tensor.py index 24e56882f..21bf13de6 100644 --- a/gpytorch/lazy/cat_lazy_tensor.py +++ b/gpytorch/lazy/cat_lazy_tensor.py @@ -35,14 +35,14 @@ class CatLazyTensor(LazyTensor): dimension. Args: - - :attr:`lazy_tensors` (list of LazyTensors): + lazy_tensors (list of LazyTensors): A list of LazyTensors whose sizes are the same except in - concatenating dimension :attr:`dim` - - :attr:`dim` (int): + concatenating dimension dim + dim (int): The concatenating dimension which can be a batch dimension. - - :attr:`output_device` (torch.device): - The CatLazyTensor will appear to appear on :attr:`output_device` - and place any output `torch.Tensors` on :attr:`output_device` + output_device (torch.device): + The CatLazyTensor will appear to appear on output_device + and place any output `torch.Tensors` on output_device """ def _check_args(self, *lazy_tensors, dim=0, output_device=None): diff --git a/gpytorch/lazy/diag_lazy_tensor.py b/gpytorch/lazy/diag_lazy_tensor.py index c50643b01..cce6a635c 100644 --- a/gpytorch/lazy/diag_lazy_tensor.py +++ b/gpytorch/lazy/diag_lazy_tensor.py @@ -19,7 +19,7 @@ def __init__(self, diag): Diagonal lazy tensor. Supports arbitrary batch sizes. Args: - :attr:`diag` (Tensor): + diag (Tensor): A `b1 x ... x bk x n` Tensor, representing a `b1 x ... x bk`-sized batch of `n x n` diagonal matrices """ @@ -208,10 +208,10 @@ def __init__(self, diag_values, diag_shape): Used e.g. for adding jitter to matrices. Args: - :attr:`diag_values` (Tensor): + diag_values (Tensor): A `b1 x ... x bk x 1` Tensor, representing a `b1 x ... x bk`-sized batch of `diag_shape x diag_shape` diagonal matrices - :attr:`diag_shape` (int): + diag_shape (int): The (non-batch) dimension of the (square) matrix """ if settings.debug.on(): diff --git a/gpytorch/lazy/identity_lazy_tensor.py b/gpytorch/lazy/identity_lazy_tensor.py index 1300f1715..84426afa6 100644 --- a/gpytorch/lazy/identity_lazy_tensor.py +++ b/gpytorch/lazy/identity_lazy_tensor.py @@ -19,7 +19,7 @@ def __init__(self, diag_shape, batch_shape=torch.Size([]), dtype=None, device=No Identity matrix lazy tensor. Supports arbitrary batch sizes. Args: - :attr:`diag` (Tensor): + diag (Tensor): A `b1 x ... x bk x n` Tensor, representing a `b1 x ... x bk`-sized batch of `n x n` identity matrices """ diff --git a/gpytorch/lazy/lazy_tensor.py b/gpytorch/lazy/lazy_tensor.py index c477759bc..7bd664615 100644 --- a/gpytorch/lazy/lazy_tensor.py +++ b/gpytorch/lazy/lazy_tensor.py @@ -213,11 +213,11 @@ def _getitem(self, row_index, col_index, *batch_indices): handled by the `_getindices` method Args: - :attr:`row_index` (slice, Tensor): + row_index (slice, Tensor): Index for the row of the LazyTensor - :attr:`col_index` (slice, Tensor): + col_index (slice, Tensor): Index for the col of the LazyTensor - :attr:`batch_indices` (tuple of slice, int, Tensor): + batch_indices (tuple of slice, int, Tensor): Indices for the batch dimensions Returns: @@ -1178,9 +1178,9 @@ def inv_matmul(self, right_tensor, left_tensor=None): A^{-1} R, \end{equation} - where :math:`R` is :attr:`right_tensor` and :math:`A` is the LazyTensor. + where :math:`R` is right_tensor and :math:`A` is the LazyTensor. - If :attr:`left_tensor` is supplied, computes + If left_tensor is supplied, computes ... math:: @@ -1188,7 +1188,7 @@ def inv_matmul(self, right_tensor, left_tensor=None): L A^{-1} R, \end{equation} - where :math:`L` is :attr:`left_tensor`. Supplying this can reduce the number of + where :math:`L` is left_tensor. Supplying this can reduce the number of CG calls required. Args: @@ -1523,7 +1523,7 @@ def pivoted_cholesky(self, rank, error_tol=None, return_pivots=False): :param int rank: The size of the partial pivoted Cholesky factor. :param error_tol: Defines an optional stopping criterion. - If the residual of the factorization is less than :attr:`error_tol`, then the + If the residual of the factorization is less than error_tol, then the factorization will exit early. This will result in a :math:`\leq \text{ rank}` factor. :type error_tol: float, optional :param bool return_pivots: (default: False) Whether or not to return the pivots alongside @@ -1553,7 +1553,7 @@ def prod(self, dim=None): Returns a `b/k x n x m` LazyTensor. Args: - :attr:`mul_batch_size` (int or None): + mul_batch_size (int or None): Controls the number of groups that are multiplied over (default: None). Returns: @@ -1903,7 +1903,7 @@ def sum(self, dim=None): If set to None, then sums all dimensions Args: - :attr:`dim` (int): + dim (int): Which dimension is being summed over (default=None) Returns: @@ -1949,12 +1949,8 @@ def svd(self) -> Tuple["LazyTensor", Tensor, "LazyTensor"]: Does NOT sort the sigular values. Returns: - :obj:`~gpytorch.lazy.LazyTensor`: - The left singular vectors (`U`). - :obj:`torch.Tensor`: - The singular values (`S`). - :obj:`~gpytorch.lazy.LazyTensor`: - The right singular vectors (`V`). + Tuple containing the left singular vectors (`U`), the singular values (`S`), + and the right singular vectors (`V`). """ return self._svd() @@ -1966,13 +1962,11 @@ def symeig(self, eigenvectors: bool = False) -> Tuple[Tensor, Optional["LazyTens structure. Does NOT sort the eigenvalues. Args: - :attr:`eigenvectors` (bool): If True, compute the eigenvectors in addition to the eigenvalues. + eigenvectors (bool): If True, compute the eigenvectors in addition to the eigenvalues. Returns: - :obj:`torch.Tensor`: - The eigenvalues. - :obj:`~gpytorch.lazy.LazyTensor`: - The eigenvectors. If `eigenvectors=False`, this is None. Otherwise, this LazyTensor - contains the orthonormal eigenvectors of the matrix. + Tuple containing the eigenvalues and eigenvectors. If `eigenvectors=False`, + this is None. Otherwise, this LazyTensor contains the orthonormal eigenvectors + of the matrix. """ try: evals, evecs = pop_from_cache(self, "symeig", eigenvectors=True) @@ -2099,7 +2093,7 @@ def zero_mean_mvn_samples(self, num_samples): Self should be symmetric, either (batch_size x num_dim x num_dim) or (num_dim x num_dim) Args: - :attr:`num_samples` (int): + num_samples (int): Number of samples to draw. Returns: @@ -2150,7 +2144,7 @@ def __add__(self, other): or lazy tensor. Args: - :attr:`other` (:obj:`torch.tensor` or :obj:`gpytorch.lazy.LazyTensor`): + other (:obj:`torch.tensor` or :obj:`gpytorch.lazy.LazyTensor`): Matrix to add to this one. Returns: @@ -2187,7 +2181,7 @@ def __div__(self, other): the elementwise reciprocal of another matrix or lazy tensor. Args: - :attr:`other` (:obj:`torch.tensor` or :obj:`gpytorch.lazy.LazyTensor`): + other (:obj:`torch.tensor` or :obj:`gpytorch.lazy.LazyTensor`): Matrix to divide this one by. Returns: diff --git a/gpytorch/lazy/sum_batch_lazy_tensor.py b/gpytorch/lazy/sum_batch_lazy_tensor.py index 7956a5503..ae59f87ed 100644 --- a/gpytorch/lazy/sum_batch_lazy_tensor.py +++ b/gpytorch/lazy/sum_batch_lazy_tensor.py @@ -10,15 +10,15 @@ class SumBatchLazyTensor(BlockLazyTensor): """ Represents a lazy tensor that is actually the sum of several lazy tensors blocks. - The :attr:`block_dim` attribute specifies which dimension of the base LazyTensor + The block_dim attribute specifies which dimension of the base LazyTensor specifies the blocks. For example, (with `block_dim=-3` a `k x n x n` tensor represents `k` `n x n` blocks (a `n x n` matrix). A `b x k x n x n` tensor represents `k` `b x n x n` blocks (a `b x n x n` batch matrix). Args: - :attr:`base_lazy_tensor` (LazyTensor): + base_lazy_tensor (LazyTensor): A `k x n x n` LazyTensor, or a `b x k x n x n` LazyTensor. - :attr:`block_dim` (int): + block_dim (int): The dimension that specifies the blocks. """ diff --git a/gpytorch/lazy/toeplitz_lazy_tensor.py b/gpytorch/lazy/toeplitz_lazy_tensor.py index ca1170d60..0b2f0e57a 100644 --- a/gpytorch/lazy/toeplitz_lazy_tensor.py +++ b/gpytorch/lazy/toeplitz_lazy_tensor.py @@ -10,7 +10,7 @@ class ToeplitzLazyTensor(LazyTensor): def __init__(self, column): """ Args: - :attr: `column` (Tensor) + column (Tensor) If `column` is a 1D Tensor of length `n`, this represents a Toeplitz matrix with `column` as its first column. If `column` is `b_1 x b_2 x ... x b_k x n`, then this represents a batch diff --git a/gpytorch/lazy/triangular_lazy_tensor.py b/gpytorch/lazy/triangular_lazy_tensor.py index 91e2c4087..b7a80791e 100644 --- a/gpytorch/lazy/triangular_lazy_tensor.py +++ b/gpytorch/lazy/triangular_lazy_tensor.py @@ -27,10 +27,10 @@ def __init__(self, tensor: Allsor, upper: bool = False) -> None: Triangular lazy tensor. Supports arbitrary batch sizes. Args: - :attr:`tensor` (Tensor or LazyTensor): + tensor (Tensor or LazyTensor): A `b1 x ... x bk x n x n` Tensor, representing a `b1 x ... x bk`-sized batch of `n x n` triangular matrices. - :attr:`upper` (bool): + upper (bool): If True, the tensor is considered to be upper-triangular, otherwise lower-triangular. """ if isinstance(tensor, TriangularLazyTensor): diff --git a/gpytorch/likelihoods/likelihood.py b/gpytorch/likelihoods/likelihood.py index 63430204c..30d6c5269 100644 --- a/gpytorch/likelihoods/likelihood.py +++ b/gpytorch/likelihoods/likelihood.py @@ -96,7 +96,7 @@ class Likelihood(_Likelihood): \end{cases} In either case, to implement a likelihood function, GPyTorch only - requires a :attr:`forward` method that computes the conditional distribution + requires a forward method that computes the conditional distribution :math:`p(y \mid f(\mathbf x))`. Calling this object does one of two things: @@ -191,7 +191,7 @@ def forward(self, function_samples, *args, data={}, **kwargs): :type data: dict {str: torch.Tensor}, optional - Pyro integration only :param args: Additional args :param kwargs: Additional kwargs - :rtype: :obj:`Distribution` (with same shape as :attr:`function_samples` ) + :rtype: :obj:`Distribution` (with same shape as function_samples ) """ raise NotImplementedError @@ -228,7 +228,7 @@ def marginal(self, function_dist, *args, **kwargs): With both exact inference and variational inference, the form of :math:`p(\mathbf f|\mathcal D, \mathbf x)` or :math:`p(\mathbf f| - \mathbf x)` should usually be Gaussian. As a result, :attr:`function_dist` + \mathbf x)` should usually be Gaussian. As a result, function_dist should usually be a :obj:`~gpytorch.distributions.MultivariateNormal` specified by the mean and (co)variance of :math:`p(\mathbf f|...)`. diff --git a/gpytorch/mlls/_approximate_mll.py b/gpytorch/mlls/_approximate_mll.py index c5f4f2449..530019e4c 100644 --- a/gpytorch/mlls/_approximate_mll.py +++ b/gpytorch/mlls/_approximate_mll.py @@ -10,22 +10,22 @@ class _ApproximateMarginalLogLikelihood(MarginalLogLikelihood, ABC): r""" An approximate marginal log likelihood (typically a bound) for approximate GP models. - We expect that :attr:`model` is a :obj:`gpytorch.models.ApproximateGP`. + We expect that model is a :obj:`gpytorch.models.ApproximateGP`. Args: - :attr:`likelihood` (:obj:`gpytorch.likelihoods.Likelihood`): + likelihood (:obj:`gpytorch.likelihoods.Likelihood`): The likelihood for the model - :attr:`model` (:obj:`gpytorch.models.ApproximateGP`): + model (:obj:`gpytorch.models.ApproximateGP`): The approximate GP model - :attr:`num_data` (int): + num_data (int): The total number of training data points (necessary for SGD) - :attr:`beta` (float - default 1.): + beta (float - default 1.): A multiplicative factor for the KL divergence term. Setting it to 1 (default) recovers true variational inference (as derived in `Scalable Variational Gaussian Process Classification`_). Setting it to anything less than 1 reduces the regularization effect of the model (similarly to what was proposed in `the beta-VAE paper`_). - :attr:`combine_terms` (bool): + combine_terms (bool): Whether or not to sum the expected NLL with the KL terms (default True) """ @@ -45,12 +45,13 @@ def forward(self, approximate_dist_f, target, **kwargs): Calling this function will call the likelihood's `expected_log_prob` function. Args: - :attr:`approximate_dist_f` (:obj:`gpytorch.distributions.MultivariateNormal`): + approximate_dist_f (:obj:`gpytorch.distributions.MultivariateNormal`): :math:`q(\mathbf f)` the outputs of the latent function (the :obj:`gpytorch.models.ApproximateGP`) - :attr:`target` (`torch.Tensor`): + target (`torch.Tensor`): :math:`\mathbf y` The target values - :attr:`**kwargs`: - Additional arguments passed to the likelihood's `expected_log_prob` function. + + Keyword Args: + Additional arguments passed to the likelihood's `expected_log_prob` function. """ # Get likelihood term and KL term num_batch = approximate_dist_f.event_shape[0] diff --git a/gpytorch/mlls/leave_one_out_pseudo_likelihood.py b/gpytorch/mlls/leave_one_out_pseudo_likelihood.py index 6252515f6..e89ddb3e6 100644 --- a/gpytorch/mlls/leave_one_out_pseudo_likelihood.py +++ b/gpytorch/mlls/leave_one_out_pseudo_likelihood.py @@ -52,7 +52,7 @@ def forward(self, function_dist: MultivariateNormal, target: Tensor, *params) -> :param ~gpytorch.distributions.MultivariateNormal output: the outputs of the latent function (the :obj:`~gpytorch.models.GP`) :param torch.Tensor target: :math:`\mathbf y` The target values - :param dict kwargs: Additional arguments to pass to the likelihood's :attr:`forward` function. + :param dict kwargs: Additional arguments to pass to the likelihood's forward function. """ output = self.likelihood(function_dist, *params) m, L = output.mean, output.lazy_covariance_matrix.cholesky(upper=False) diff --git a/gpytorch/mlls/marginal_log_likelihood.py b/gpytorch/mlls/marginal_log_likelihood.py index be696c9c8..ab5dc4c7b 100644 --- a/gpytorch/mlls/marginal_log_likelihood.py +++ b/gpytorch/mlls/marginal_log_likelihood.py @@ -43,6 +43,6 @@ def forward(self, output, target, **kwargs): :param ~gpytorch.distributions.MultivariateNormal output: the outputs of the latent function (the :obj:`~gpytorch.models.GP`) :param torch.Tensor target: :math:`\mathbf y` The target values - :param dict kwargs: Additional arguments to pass to the likelihood's :attr:`forward` function. + :param dict kwargs: Additional arguments to pass to the likelihood's forward function. """ raise NotImplementedError diff --git a/gpytorch/models/exact_prediction_strategies.py b/gpytorch/models/exact_prediction_strategies.py index 94ba7f8ab..77c578943 100644 --- a/gpytorch/models/exact_prediction_strategies.py +++ b/gpytorch/models/exact_prediction_strategies.py @@ -83,7 +83,7 @@ def _exact_predictive_covar_inv_quad_form_cache(self, train_train_covar_inv_root test_train_covar (:obj:`torch.tensor`): the observed noise (from the likelihood) Returns - - A precomputed cache + A precomputed cache """ res = train_train_covar_inv_root if settings.detach_test_caches.on(): @@ -120,19 +120,18 @@ def get_fantasy_strategy(self, inputs, targets, full_inputs, full_targets, full_ GP model, use the :meth:`~gpytorch.models.ExactGP.get_fantasy_model` method. Args: - - :attr:`inputs` (Tensor `b1 x ... x bk x m x d` or `f x b1 x ... x bk x m x d`): Locations of fantasy + inputs (Tensor `b1 x ... x bk x m x d` or `f x b1 x ... x bk x m x d`): Locations of fantasy observations. - - :attr:`targets` (Tensor `b1 x ... x bk x m` or `f x b1 x ... x bk x m`): Labels of fantasy observations. - - :attr:`full_inputs` (Tensor `b1 x ... x bk x n+m x d` or `f x b1 x ... x bk x n+m x d`): Training data + targets (Tensor `b1 x ... x bk x m` or `f x b1 x ... x bk x m`): Labels of fantasy observations. + full_inputs (Tensor `b1 x ... x bk x n+m x d` or `f x b1 x ... x bk x n+m x d`): Training data concatenated with fantasy inputs - - :attr:`full_targets` (Tensor `b1 x ... x bk x n+m` or `f x b1 x ... x bk x n+m`): Training labels + full_targets (Tensor `b1 x ... x bk x n+m` or `f x b1 x ... x bk x n+m`): Training labels concatenated with fantasy labels. - - :attr:`full_output` (:class:`gpytorch.distributions.MultivariateNormal`): Prior called on full_inputs + full_output (:class:`gpytorch.distributions.MultivariateNormal`): Prior called on full_inputs Returns: - - :class:`DefaultPredictionStrategy` - A `DefaultPredictionStrategy` model with `n + m` training examples, where the `m` fantasy examples have - been added and all test-time caches have been updated. + A `DefaultPredictionStrategy` model with `n + m` training examples, where the `m` fantasy examples have + been added and all test-time caches have been updated. """ full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix diff --git a/gpytorch/models/model_list.py b/gpytorch/models/model_list.py index 3a1ff5931..b122777fd 100644 --- a/gpytorch/models/model_list.py +++ b/gpytorch/models/model_list.py @@ -56,13 +56,12 @@ def get_fantasy_model(self, inputs, targets, **kwargs): and returns the same class of fantasy models. Args: - - :attr:`inputs`: List of locations of fantasy observations, one for each model. - - :attr:`targets` List of labels of fantasy observations, one for each model. + inputs: List of locations of fantasy observations, one for each model. + targets List of labels of fantasy observations, one for each model. Returns: - - :class:`IndependentModelList` - An `IndependentModelList` model, where each sub-model is the fantasy model of the respective - sub-model in the original model at the corresponding input locations / labels. + An `IndependentModelList` model, where each sub-model is the fantasy model of the respective + sub-model in the original model at the corresponding input locations / labels. """ if "noise" in kwargs: diff --git a/gpytorch/models/pyro/pyro_gp.py b/gpytorch/models/pyro/pyro_gp.py index 460d34eca..12c0527e4 100644 --- a/gpytorch/models/pyro/pyro_gp.py +++ b/gpytorch/models/pyro/pyro_gp.py @@ -18,16 +18,16 @@ class PyroGP(GP, _PyroMixin): See `the Pyro examples `_ for detailed examples. Args: - :attr:`variational_strategy` (:obj:`~gpytorch.variational.VariationalStrategy`): + variational_strategy (:obj:`~gpytorch.variational.VariationalStrategy`): The variational strategy that defines the variational distribution and the marginalization strategy. - :attr:`likelihood` (:obj:`~gpytorch.likelihoods.Likelihood`): + likelihood (:obj:`~gpytorch.likelihoods.Likelihood`): The likelihood for the model - :attr:`num_data` (int): + num_data (int): The total number of training data points (necessary for SGD) - :attr:`name_prefix` (str, optional): + name_prefix (str, optional): A prefix to put in front of pyro sample/plate sites - :attr:`beta` (float - default 1.): + beta (float - default 1.): A multiplicative factor for the KL divergence term. Setting it to 1 (default) recovers true variational inference (as derived in `Scalable Variational Gaussian Process Classification`_). diff --git a/gpytorch/module.py b/gpytorch/module.py index 3a583798e..0cf0af6dc 100644 --- a/gpytorch/module.py +++ b/gpytorch/module.py @@ -190,9 +190,9 @@ def register_parameter(self, name, parameter): Adds a parameter to the module. The parameter can be accessed as an attribute using the given name. Args: - :attr:`name` (str): + name (str): The name of the parameter - :attr:`parameter` (torch.nn.Parameter): + parameter (torch.nn.Parameter): The parameter """ if "_parameters" not in self.__dict__: @@ -204,11 +204,11 @@ def register_prior(self, name, prior, param_or_closure, setting_closure=None): Adds a prior to the module. The prior can be accessed as an attribute using the given name. Args: - :attr:`name` (str): + name (str): The name of the prior - :attr:`prior` (Prior): + prior (Prior): The prior to be registered` - :attr:`param_or_closure` (string or callable): + param_or_closure (string or callable): Either the name of the parameter, or a closure (which upon calling evalutes a function on the module instance and one or more parameters): single parameter without a transform: `.register_prior("foo_prior", foo_prior, "foo_param")` @@ -216,7 +216,7 @@ def register_prior(self, name, prior, param_or_closure, setting_closure=None): `.register_prior("foo_prior", NormalPrior(0, 1), lambda module: torch.log(module.foo_param))` function of multiple parameters: `.register_prior("foo2_prior", foo2_prior, lambda module: f(module.param1, module.param2)))` - :attr:`setting_closure` (callable, optional): + setting_closure (callable, optional): A function taking in the module instance and a tensor in (transformed) parameter space, initializing the internal parameter representation to the proper value by applying the inverse transform. Enables setting parametres directly in the transformed space, as well @@ -408,7 +408,7 @@ def pyro_load_from_samples(self, samples_dict): the prior to properly set the unconstrained parameter. Args: - :attr:`samples_dict` (dict): Dictionary mapping *prior names* to sample values. + samples_dict (dict): Dictionary mapping *prior names* to sample values. """ return _pyro_load_from_samples(module=self, samples_dict=samples_dict, memo=None, prefix="") diff --git a/gpytorch/settings.py b/gpytorch/settings.py index cf3a698c7..c85579e1e 100644 --- a/gpytorch/settings.py +++ b/gpytorch/settings.py @@ -311,7 +311,7 @@ class fast_computations: functions used in GP inference. The functions that can be controlled are: - * :attr:`covar_root_decomposition` + * covar_root_decomposition This feature flag controls how matrix root decompositions (:math:`K = L L^\top`) are computed (e.g. for sampling, computing caches, etc.). @@ -323,7 +323,7 @@ class fast_computations: * If set to False, covariance matrices :math:`K` are decomposed using the Cholesky decomposition. - * :attr:`log_prob` + * log_prob This feature flag controls how GPyTorch computes the marginal log likelihood for exact GPs and `log_prob` for multivariate normal distributions @@ -336,7 +336,7 @@ class fast_computations: * If set to False, `log_prob` is computed using the Cholesky decomposition. - * :attr:`fast_solves` + * fast_solves This feature flag controls how GPyTorch computes the solves of positive-definite matrices. * If set to True, diff --git a/gpytorch/utils/cholesky.py b/gpytorch/utils/cholesky.py index 2d2d3001f..135cc5c3c 100644 --- a/gpytorch/utils/cholesky.py +++ b/gpytorch/utils/cholesky.py @@ -50,16 +50,16 @@ def _psd_safe_cholesky(A, out=None, jitter=None, max_tries=None): def psd_safe_cholesky(A, upper=False, out=None, jitter=None, max_tries=None): """Compute the Cholesky decomposition of A. If A is only p.s.d, add a small jitter to the diagonal. Args: - :attr:`A` (Tensor): + A (Tensor): The tensor to compute the Cholesky decomposition of - :attr:`upper` (bool, optional): + upper (bool, optional): See torch.cholesky - :attr:`out` (Tensor, optional): + out (Tensor, optional): See torch.cholesky - :attr:`jitter` (float, optional): + jitter (float, optional): The jitter to add to the diagonal of A in case A is only p.s.d. If omitted, uses settings.cholesky_jitter.value() - :attr:`max_tries` (int, optional): + max_tries (int, optional): Number of attempts (with successively increasing jitter) to make before raising an error. """ L = _psd_safe_cholesky(A, out=out, jitter=jitter, max_tries=max_tries) diff --git a/gpytorch/utils/permutation.py b/gpytorch/utils/permutation.py index a112cda7a..53bba5a99 100644 --- a/gpytorch/utils/permutation.py +++ b/gpytorch/utils/permutation.py @@ -20,12 +20,12 @@ def apply_permutation( \end{equation} where the permutation matrices :math:`\boldsymbol{\Pi}_\text{left}` and :math:`\boldsymbol{\Pi}_\text{right}^\top` - are represented by vectors :attr:`left_permutation` and :attr:`right_permutation`. + are represented by vectors left_permutation and right_permutation. The permutation matrices may be partial permutations (only selecting a subset of rows/columns) or full permutations (permuting all rows/columns). - Importantly, if :math:`\mathbf K` is a batch of matrices, :attr:`left_permutation` and :attr:`right_permutation` + Importantly, if :math:`\mathbf K` is a batch of matrices, left_permutation and right_permutation can be a batch of permutation vectors, and this function will apply the appropriate permutation to each batch entry. Broadcasting rules apply. diff --git a/gpytorch/variational/_variational_strategy.py b/gpytorch/variational/_variational_strategy.py index 779d1fc04..de4dae08b 100644 --- a/gpytorch/variational/_variational_strategy.py +++ b/gpytorch/variational/_variational_strategy.py @@ -105,7 +105,7 @@ def forward(self, x, inducing_points, inducing_values, variational_inducing_cova (or the mean of the distribution :math:`q(\mathbf u)` if q is a Gaussian. :param ~gpytorch.lazy.LazyTensor variational_inducing_covar: If the distribuiton :math:`q(\mathbf u)` is Gaussian, then this variable is the covariance matrix of that Gaussian. Otherwise, it will be - :attr:`None`. + None. :rtype: :obj:`~gpytorch.distributions.MultivariateNormal` :return: The distribution :math:`q( \mathbf f(\mathbf X))` diff --git a/gpytorch/variational/batch_decoupled_variational_strategy.py b/gpytorch/variational/batch_decoupled_variational_strategy.py index c9fc52116..40cb86d2f 100644 --- a/gpytorch/variational/batch_decoupled_variational_strategy.py +++ b/gpytorch/variational/batch_decoupled_variational_strategy.py @@ -42,11 +42,11 @@ class BatchDecoupledVariationalStrategy(VariationalStrategy): Additionally, you can use a different set of kernel hyperparameters for the mean and the variance function. We recommend using this feature only with the :obj:`~gpytorch.mlls.PredictiveLogLikelihood` objective function as proposed in "Parametric Gaussian Process Regressors" (`Jankowiak et al. (2020)`_). - Use the :attr:`mean_var_batch_dim` to indicate which batch dimension corresponds to the different mean/var + Use the mean_var_batch_dim to indicate which batch dimension corresponds to the different mean/var kernels. .. note:: - We recommend using the "right-most" batch dimension (i.e. :attr:`mean_var_batch_dim=-1`) for the dimension + We recommend using the "right-most" batch dimension (i.e. ``mean_var_batch_dim=-1``) for the dimension that corresponds to the different mean/variance kernel parameters. Assuming you want `b1` many independent GPs, the :obj:`~gpytorch.variational._VariationalDistribution` diff --git a/gpytorch/variational/independent_multitask_variational_strategy.py b/gpytorch/variational/independent_multitask_variational_strategy.py index 9fbb461c3..0e4c89b5b 100644 --- a/gpytorch/variational/independent_multitask_variational_strategy.py +++ b/gpytorch/variational/independent_multitask_variational_strategy.py @@ -24,7 +24,7 @@ class IndependentMultitaskVariationalStrategy(_VariationalStrategy): dimensions corresponds to the multiple tasks. :param ~gpytorch.variational.VariationalStrategy base_variational_strategy: Base variational strategy - :param int num_tasks: Number of tasks. Should correspond to the batch size of :attr:`task_dim`. + :param int num_tasks: Number of tasks. Should correspond to the batch size of task_dim. :param int task_dim: (Default: -1) Which batch dimension is the task dimension """ @@ -102,7 +102,7 @@ class MultitaskVariationalStrategy(IndependentMultitaskVariationalStrategy): dimensions corresponds to the multiple tasks. :param ~gpytorch.variational.VariationalStrategy base_variational_strategy: Base variational strategy - :param int num_tasks: Number of tasks. Should correspond to the batch size of :attr:`task_dim`. + :param int num_tasks: Number of tasks. Should correspond to the batch size of task_dim. :param int task_dim: (Default: -1) Which batch dimension is the task dimension """ diff --git a/gpytorch/variational/lmc_variational_strategy.py b/gpytorch/variational/lmc_variational_strategy.py index 22657b535..7690c8fab 100644 --- a/gpytorch/variational/lmc_variational_strategy.py +++ b/gpytorch/variational/lmc_variational_strategy.py @@ -157,11 +157,11 @@ def __call__(self, x, task_indices=None, prior=False, **kwargs): There are two modes: 1. Compute **all tasks** for all inputs. - If this is the case, the :attr:`task_indices` attribute should be None. + If this is the case, the task_indices attribute should be None. The return type will be a (... x N x num_tasks) :class:`~gpytorch.distributions.MultitaskMultivariateNormal`. 2. Compute **one task** per inputs. - If this is the case, the (... x N) :attr:`task_indices` tensor should contain + If this is the case, the (... x N) task_indices tensor should contain the indices of each input's assigned task. The return type will be a (... x N) :class:`~gpytorch.distributions.MultivariateNormal`.