apache
diff --git a/‎CODEOWNERS
Lines changed: 2 additions & 3 deletions b/‎CODEOWNERS
Lines changed: 2 additions & 3 deletions
diff --git a/‎Makefile
Lines changed: 2 additions & 1 deletion b/‎Makefile
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmark/opperf/README.md
Lines changed: 1 addition & 1 deletion b/‎benchmark/opperf/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/opperf/nd_operations/README.md
Lines changed: 1 addition & 12 deletions b/‎benchmark/opperf/nd_operations/README.md
Lines changed: 1 addition & 12 deletions
diff --git a/‎benchmark/opperf/nd_operations/nn_optimizer_operators.py
Lines changed: 64 additions & 0 deletions b/‎benchmark/opperf/nd_operations/nn_optimizer_operators.py
Lines changed: 64 additions & 0 deletions
diff --git a/‎benchmark/opperf/opperf.py
Lines changed: 3 additions & 0 deletions b/‎benchmark/opperf/opperf.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎benchmark/opperf/rules/default_params.py
Lines changed: 56 additions & 2 deletions b/‎benchmark/opperf/rules/default_params.py
Lines changed: 56 additions & 2 deletions
diff --git a/‎benchmark/opperf/utils/op_registry_utils.py
Lines changed: 21 additions & 0 deletions b/‎benchmark/opperf/utils/op_registry_utils.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎cpp-package/example/inference/README.md
Lines changed: 0 additions & 3 deletions b/‎cpp-package/example/inference/README.md
Lines changed: 0 additions & 3 deletions
diff --git a/‎docs/_static/js/options.js
Lines changed: 1 addition & 1 deletion b/‎docs/_static/js/options.js
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/_static/mxnet-theme/index.html
Lines changed: 3 additions & 3 deletions b/‎docs/_static/mxnet-theme/index.html
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/api/scala/symbol.md
Lines changed: 1 addition & 1 deletion b/‎docs/api/scala/symbol.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/faq/env_var.md
Lines changed: 2 additions & 1 deletion b/‎docs/faq/env_var.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/install/download.md
Lines changed: 1 addition & 0 deletions b/‎docs/install/download.md
Lines changed: 1 addition & 0 deletions
@@ -42,8 +42,8 @@
 /plugin/          @pllarroy
 
 # CMake
-CMakeLists.txt    @szha @rahul003 @pllarroy
-/cmake/           @szha @rahul003 @pllarroy
+CMakeLists.txt    @szha @pllarroy
+/cmake/           @szha @pllarroy
 
 # MXNet CI
 dev_menu.py         @pllarroy
@@ -71,4 +71,3 @@ prepare_mkl.sh    @szha
 
 # Github templates
 /.github/         @szha
-
@@ -190,6 +190,7 @@ endif
 
 ifeq ($(USE_OPENMP), 1)
 	CFLAGS += -fopenmp
+	CFLAGS += -DMXNET_USE_OPENMP=1
 endif
 
 ifeq ($(USE_NNPACK), 1)
@@ -252,7 +253,7 @@ ifeq ($(USE_CUDNN), 1)
 	LDFLAGS += -lcudnn
 endif
 
-ifeq ($(USE_BLAS), open)
+ifeq ($(USE_BLAS), openblas)
 	CFLAGS += -DMXNET_USE_BLAS_OPEN=1
 else ifeq ($(USE_BLAS), atlas)
 	CFLAGS += -DMXNET_USE_BLAS_ATLAS=1
 
@@ -75,7 +75,7 @@ For example, you want to run benchmarks for all NDArray Broadcast Binary Operato
 
 ```
 #!/usr/bin/python
-from benchmark.opperf.tensor_operations.binary_broadcast_operators import run_mx_binary_broadcast_operators_benchmarks
+from benchmark.opperf.nd_operations.binary_broadcast_operators import run_mx_binary_broadcast_operators_benchmarks
 
 # Run all Binary Broadcast operations benchmarks with default input values
 print(run_mx_binary_broadcast_operators_benchmarks())
 
@@ -28,17 +28,14 @@
 6. reshape
 7. one_hot
 8. linalg_potri
-9. mp_sgd_update
 10. multi_sgd_update
-11. signum_update
 12. Convolution_v1
 13. repeat
 14. Custom
 15. softmax_cross_entropy
 16. SwapAxis
 17. norm
 18. Softmax
-19. rmspropalex_update
 20. fill_element_0index
 21. cast
 22. UpSampling
@@ -52,32 +49,27 @@
 30. Activation
 31. LinearRegressionOutput
 32. Pooling_v1
-33. ftml_update
 34. Crop
 35. ElementWiseSum
 36. diag
 37. Reshape
 38. Pad
 39. linalg_gemm2
 40. crop
-41. rmsprop_update
 43. RNN
 45. SoftmaxOutput
 46. linalg_extractdiag
-47. sgd_mom_update
 48. SequenceLast
 51. SequenceReverse
 53. SVMOutput
 54. linalg_trsm
 55. where
 56. SoftmaxActivation
-57. signsgd_update
 58. slice
 59. linalg_gelqf
 60. softmin
 61. linalg_gemm
 62. BilinearSampler
-63. mp_sgd_mom_update
 64. choose_element_0index
 65. tile
 67. gather_nd
@@ -110,7 +102,6 @@
 98. linalg_syrk
 99. squeeze
 101. ROIPooling
-102. ftrl_update
 103. SliceChannel
 104. slice_like
 106. linalg_maketrian
@@ -127,6 +118,4 @@
 119. normal
 120. take
 121. MakeLoss
-122. sgd_update
-123. adam_update
-124. concat
+124. concat
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks
+from benchmark.opperf.utils.op_registry_utils import get_all_optimizer_operators
+
+"""Performance benchmark tests for MXNet Neural Network Optimizer Update Operators.
+
+1. Stochastic Gradient Descent (SGD)
+    1.1 mp_sgd_update
+    1.2 sgd_mom_update
+    1.3 signsgd_update
+    1.4 mp_sgd_mom_update
+    1.5 sgd_update
+2. signum_update
+3. rmspropalex_update
+4. ftml_update
+5. rmsprop_update
+6. ftrl_update
+7. adam_update
+"""
+
+
+def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
+    """Runs benchmarks with the given context and precision (dtype) for all the neural network
+    optimizer update operators in MXNet.
+
+    Parameters
+    ----------
+    ctx: mx.ctx
+        Context to run benchmarks
+    dtype: str, default 'float32'
+        Precision to use for benchmarks
+    warmup: int, default 25
+        Number of times to run for warmup
+    runs: int, default 100
+        Number of runs to capture benchmark results
+
+    Returns
+    -------
+    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
+
+    """
+    # Fetch all optimizer operators
+    mx_optimizer_ops = get_all_optimizer_operators()
+
+    # Run benchmarks
+    mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx, warmup, runs)
+    return mx_optimizer_op_results
@@ -39,6 +39,7 @@
 from benchmark.opperf.nd_operations.nn_conv_operators import run_pooling_operators_benchmarks, \
     run_convolution_operators_benchmarks, run_transpose_convolution_operators_benchmarks
 from benchmark.opperf.nd_operations.nn_basic_operators import run_nn_basic_operators_benchmarks
+from benchmark.opperf.nd_operations.nn_optimizer_operators import run_optimizer_operators_benchmarks
 from benchmark.opperf.nd_operations.array_rearrange import run_rearrange_operators_benchmarks
 
 from benchmark.opperf.utils.common_utils import merge_map_list, save_to_file
@@ -96,6 +97,8 @@ def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32'):
     # Run all Convolution operations benchmarks with default input values
     mxnet_operator_benchmark_results.append(run_convolution_operators_benchmarks(ctx=ctx, dtype=dtype))
 
+    # Run all Optimizer operations benchmarks with default input values
+    mxnet_operator_benchmark_results.append(run_optimizer_operators_benchmarks(ctx=ctx, dtype=dtype))
     # Run all Transpose Convolution operations benchmarks with default input values
     mxnet_operator_benchmark_results.append(run_transpose_convolution_operators_benchmarks(ctx=ctx, dtype=dtype))
 
 
@@ -34,6 +34,7 @@
 
 # For operators like - random_uniform, random_normal etc..
 DEFAULT_SHAPE = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_SAMPLE = [(2,)]
 DEFAULT_LOW = [0]
 DEFAULT_HIGH = [5]
 DEFAULT_K = [1]
@@ -62,6 +63,31 @@
 # NOTE: Data used is DEFAULT_DATA
 DEFAULT_AXIS = [0]
 
+# For optimizer operators
+DEFAULT_WEIGHT = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_GRAD = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_MOM = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_MEAN = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_VAR = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_N = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_D = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_V = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_Z = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_G = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_DELTA = [(1024, 1024), (10000, 1), (10000, 100)]
+DEFAULT_LRS = [(0.1,0.1)]
+DEFAULT_LR = [0.1,0.5,0.9]
+DEFAULT_GAMMA_1 = [0.1,0.5,0.9]
+DEFAULT_GAMMA_2 = [0.1,0.5,0.9]
+DEFAULT_EPSILON = [1e-08]
+DEFAULT_BETA_1 = [0.1,0.5,0.9]
+DEFAULT_BETA_2 = [0.1,0.5,0.9]
+DEFAULT_T = [1,5]
+DEFAULT_RESCALE_GRAD = [0.4, 0.77]
+DEFAULT_CLIP_GRADIENT = [-1.0,0.8]
+DEFAULT_CLIP_WEIGHTS = [-1.0,0.8]
+DEFAULT_LAZY_UPDATE = [0,1]
+
 # For rearrange operators
 # NOTE: Data needs to be a 4D tensor for  operators like space_to_depth and depth_to_space
 # Hence below we append 4d to mark the difference.
@@ -71,8 +97,10 @@
 DEFAULT_DIM_2 = [1, 2, 3, 0]
 DEFAULT_BLOCK_SIZE = [2, 5]
 
+
 # Default Inputs. MXNet Op Param Name to Default Input mapping
 DEFAULTS_INPUTS = {"data": DEFAULT_DATA,
+                   "sample": DEFAULT_SAMPLE,
                    "lhs": DEFAULT_LHS,
                    "rhs": DEFAULT_RHS,
                    "shape": DEFAULT_SHAPE,
@@ -91,16 +119,42 @@
                    "p_nd": DEFAULT_P_ND,
                    "axis_shape": DEFAULT_AXIS_SHAPE,
                    "axis": DEFAULT_AXIS,
+                   "weight" : DEFAULT_WEIGHT,
+                   "weight32" : DEFAULT_WEIGHT,
+                   "grad" : DEFAULT_GRAD,
+                   "mean" : DEFAULT_MEAN,
+                   "var" : DEFAULT_VAR,
+                   "mom" : DEFAULT_MOM,
+                   "n" : DEFAULT_N,
+                   "d" : DEFAULT_D,
+                   "v" : DEFAULT_V,
+                   "z" : DEFAULT_Z,
+                   "g" : DEFAULT_G,
+                   "delta" : DEFAULT_DELTA,
+                   "lr" : DEFAULT_LR,
+                   "lrs" : DEFAULT_LRS,
+                   "wds" : DEFAULT_LRS,
+                   "gamma1" : DEFAULT_GAMMA_1,
+                   "gamma2" : DEFAULT_GAMMA_2,
+                   "epsilon" : DEFAULT_EPSILON,
+                   "beta1" : DEFAULT_BETA_1,
+                   "beta2" : DEFAULT_BETA_2,
+                   "t" : DEFAULT_T,
+                   "rescale_grad" : DEFAULT_RESCALE_GRAD,
+                   "clip_grad" : DEFAULT_CLIP_GRADIENT,
+                   "lazy_update" : DEFAULT_LAZY_UPDATE,
                    "data_4d": DEFAULT_DATA_4d,
                    "dim1": DEFAULT_DIM_1,
                    "dim2": DEFAULT_DIM_2,
                    "block_size": DEFAULT_BLOCK_SIZE}
 
+
 # These are names of MXNet operator parameters that is of type NDArray.
 # We maintain this list to automatically recognize these parameters are to be
 # given as NDArray and translate users inputs such as a shape tuple, Numpy Array or
 # a list to MXNet NDArray. This is just a convenience added so benchmark utility users
 # can just say shape of the tensor, and we automatically create Tensors.
-PARAMS_OF_TYPE_NDARRAY = ["lhs", "rhs", "data", "base", "exp",
+PARAMS_OF_TYPE_NDARRAY = ["lhs", "rhs", "data", "base", "exp", "sample",
                           "mu", "sigma", "lam", "alpha", "beta", "gamma", "k", "p",
-                          "low", "high", "weight", "bias", "moving_mean", "moving_var"]
+                          "low", "high", "weight", "bias", "moving_mean", "moving_var",
+                          "weight", "weight32", "grad", "mean", "var", "mom", "n", "d", "v", "z", "g", "delta"]
@@ -244,6 +244,27 @@ def get_all_reduction_operators():
     return reduction_mx_operators
 
 
+def get_all_optimizer_operators():
+    """Gets all Optimizer operators registered with MXNet.
+
+     Returns
+     -------
+     {"operator_name": {"has_backward", "nd_op_handle", "params"}}
+     """
+    optimizer_ops = ['mp_sgd_update', 'signum_update', 'rmspropalex_update', 'ftml_update', 'rmsprop_update',
+                     'sgd_mom_update', 'signsgd_update', 'mp_sgd_mom_update', 'ftrl_update', 'sgd_update',
+                     'adam_update']
+
+    # Get all mxnet operators
+    mx_operators = _get_all_mxnet_operators()
+
+    # Filter for Optimizer operators
+    optimizer_mx_operators = {}
+    for op_name, op_params in mx_operators.items():
+         if op_name in optimizer_ops and op_name not in unique_ops:
+             optimizer_mx_operators[op_name] = mx_operators[op_name]
+    return optimizer_mx_operators
+
 def get_all_sorting_searching_operators():
     """Gets all Sorting and Searching operators registered with MXNet.
 
 
@@ -41,7 +41,6 @@ The following performance numbers are collected via using C++ inference API on A
 ```
 export KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0
 export OMP_NUM_THREADS=$(vCPUs/2)
-export MXNET_SUBGRAPH_BACKEND=MKLDNN
 export MXNET_ENGINE_TYPE=NaiveEngine
 ```
 Also users are recommended to use ```numactl``` or ```taskset``` to bind a running process to the specified cores.
@@ -87,8 +86,6 @@ Follow the below steps to do inference with more models.
 
 The below command lines show how to run inference with FP32/INT8 resnet50_v1 model. Because the C++ inference script provides the almost same command line as this [Python script](https://github.com/apache/incubator-mxnet/blob/master/example/quantization/imagenet_inference.py) and then users can easily go from Python to C++.
 ```
-# set MKLDNN as subgraph backend
-export MXNET_SUBGRAPH_BACKEND=MKLDNN
 
 # FP32 inference
 ./imagenet_inference --symbol_file "./model/resnet50_v1-symbol.json" --params_file "./model/resnet50_v1-0000.params" --dataset "./data/val_256_q90.rec" --rgb_mean "123.68 116.779 103.939" --rgb_std "58.393 57.12 57.375" --batch_size 64 --num_skipped_batches 50 --num_inference_batches 500
 
@@ -19,7 +19,7 @@
  */
 
 /* Installation page display functions for install selector */
-var versionSelect   = defaultVersion = 'v1.4.1';
+var versionSelect   = defaultVersion = 'v1.5.0';
 var platformSelect    = 'Linux';
 var languageSelect  = 'Python';
 var processorSelect = 'CPU';
 
@@ -23,9 +23,9 @@
   <div class="container">
     <div class="row">
       <div class="col-lg-4 col-sm-12">
-        <h3>MXNet 1.4.1 Released</h3>
-        <p>This patch release features bug fixes and performance improvements.</p>
-        <a href="https://github.com/apache/incubator-mxnet/releases/tag/1.4.1">Learn More</a>
+        <h3>MXNet 1.5.0 Released</h3>
+        <p>This release features Automatic Mixed Precision, MKL-DNN updates, CUDA10.1 support and more. </p>
+        <a href="https://github.com/apache/incubator-mxnet/releases/tag/1.5.0">Learn More</a>
       </div>
       <div class="col-lg-4 col-sm-12">
         <h3>A 60-minute Gluon Crash Course</h3>
 
@@ -41,7 +41,7 @@ The following example configures a two-layer neural network.
     val data = Symbol.Variable("data")
     val fc1 = Symbol.api.FullyConnected(Some(data), num_hidden = 128, name = "fc1")
     val act1 = Symbol.api.Activation(Some(fc1), "relu", "relu1")
-    val fc2 = Symbol.api.FullyConnected(some(act1), num_hidden = 64, name = "fc2")
+    val fc2 = Symbol.api.FullyConnected(Some(act1), num_hidden = 64, name = "fc2")
     val net = Symbol.api.SoftmaxOutput(Some(fc2), name = "out")
     :type net
     // org.apache.mxnet.Symbol
 
@@ -307,9 +307,10 @@ If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`.
   - This variable controls how many CuDNN dropout state resources to create for each GPU context for use in operator.
 
 * MXNET_SUBGRAPH_BACKEND
-  - Values: String ```(default="")```
+  - Values: String ```(default="MKLDNN")``` if MKLDNN is avaliable, otherwise ```(default="")```
   - This variable controls the subgraph partitioning in MXNet.
   - This variable is used to perform MKL-DNN FP32 operator fusion and quantization. Please refer to the [MKL-DNN operator list](../tutorials/mkldnn/operator_list.md) for how this variable is used and the list of fusion passes.
+  - Set ```MXNET_SUBGRAPH_BACKEND=NONE``` to disable subgraph backend.
 
 * MXNET_SAFE_ACCUMULATION
   - Values: Values: 0(false) or 1(true) ```(default=0)```
 
@@ -21,6 +21,7 @@ These source archives are generated from tagged releases. Updates and patches wi
 
 | Version | Source                                                                                                      | PGP                                                                                                             | SHA                                                                                                                |
 |---------|-------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| 1.5.0   | [Download](https://apache.org/dist/incubator/mxnet/1.5.0/apache-mxnet-src-1.5.0-incubating.tar.gz)                 | [Download](https://apache.org/dist/incubator/mxnet/1.5.0/apache-mxnet-src-1.5.0-incubating.tar.gz.asc)    |  [Download](https://apache.org/dist/incubator/mxnet/1.5.0/apache-mxnet-src-1.5.0-incubating.tar.gz.sha512)     |
 | 1.4.1   | [Download](https://www.apache.org/dyn/closer.cgi/incubator/mxnet/1.4.1/apache-mxnet-src-1.4.1-incubating.tar.gz)   | [Download](https://apache.org/dist/incubator/mxnet/1.4.1/apache-mxnet-src-1.4.1-incubating.tar.gz.asc)    | [Download](https://apache.org/dist/incubator/mxnet/1.4.1/apache-mxnet-src-1.4.1-incubating.tar.gz.sha512)      |
 | 1.4.0   | [Download](https://www.apache.org/dyn/closer.cgi/incubator/mxnet/1.4.0/apache-mxnet-src-1.4.0-incubating.tar.gz)   | [Download](https://apache.org/dist/incubator/mxnet/1.4.0/apache-mxnet-src-1.4.0-incubating.tar.gz.asc)    | [Download](https://apache.org/dist/incubator/mxnet/1.4.0/apache-mxnet-src-1.4.0-incubating.tar.gz.sha512)      |
 | 1.3.1   | [Download](https://www.apache.org/dyn/closer.cgi/incubator/mxnet/1.3.1/apache-mxnet-src-1.3.1-incubating.tar.gz)   | [Download](https://apache.org/dist/incubator/mxnet/1.3.1/apache-mxnet-src-1.3.1-incubating.tar.gz.asc)    | [Download](https://apache.org/dist/incubator/mxnet/1.3.1/apache-mxnet-src-1.3.1-incubating.tar.gz.sha512)      |