[DOC] Fix doc for nn.Embedding, nn.Dense and nd.Embedding (apache#15869)

eric-haibin-lin · gyshi · commit cdf4967f9918 · 2019-09-07T21:18:43.000+08:00
* Update basic_layers.py

* Update indexing_op.cc
diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py
@@ -151,8 +151,9 @@ class Dense(HybridBlock):
     created by the layer, and `bias` is a bias vector created by the layer
     (only applicable if `use_bias` is `True`).
 
-    Note: the input must be a tensor with rank 2. Use `flatten` to convert it
-    to rank 2 manually if necessary.
+    .. note::
+        the input must be a tensor with rank 2. Use `flatten` to convert it
+        to rank 2 manually if necessary.
 
     Parameters
     ----------
@@ -379,11 +380,13 @@ class Embedding(HybridBlock):
     r"""Turns non-negative integers (indexes/tokens) into dense vectors
     of fixed size. eg. [4, 20] -> [[0.25, 0.1], [0.6, -0.2]]
 
-    Note: if `sparse_grad` is set to True, the gradient w.r.t weight will be
-    sparse. Only a subset of optimizers support sparse gradients, including SGD, AdaGrad
-    and Adam. By default lazy updates is turned on, which may perform differently
-    from standard updates. For more details, please check the Optimization API at:
-    https://mxnet.incubator.apache.org/api/python/optimization/optimization.html
+    .. note::
+        if `sparse_grad` is set to True, the gradient w.r.t weight will be
+        sparse. Only a subset of optimizers support sparse gradients, including SGD,
+        AdaGrad and Adam. By default lazy updates is turned on, which may perform
+        differently from standard updates. For more details, please check the
+        Optimization API at:
+        https://mxnet.incubator.apache.org/api/python/optimization/optimization.html
 
     Parameters
     ----------
diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc
@@ -490,8 +490,9 @@ All the input values should be integers in the range [0, input_dim).
 If the input_dim is ip0 and output_dim is op0, then shape of the embedding weight matrix must be
 (ip0, op0).
 
-By default, if any index mentioned is too large, it is replaced by the index that addresses
-the last vector in an embedding matrix.
+When "sparse_grad" is False, if any index mentioned is too large, it is replaced by the index that
+addresses the last vector in an embedding matrix.
+When "sparse_grad" is True, an error will be raised if invalid indices are found.
 
 Examples::