samskalicky
diff --git a/‎CMakeLists.txt
Lines changed: 17 additions & 1 deletion b/‎CMakeLists.txt
Lines changed: 17 additions & 1 deletion
diff --git a/‎Makefile
Lines changed: 16 additions & 1 deletion b/‎Makefile
Lines changed: 16 additions & 1 deletion
diff --git a/‎example/extensions/lib_api/init_lib.cc
Lines changed: 1 addition & 1 deletion b/‎example/extensions/lib_api/init_lib.cc
Lines changed: 1 addition & 1 deletion
diff --git a/‎example/extensions/lib_api/test_loading.py
Lines changed: 10 additions & 0 deletions b/‎example/extensions/lib_api/test_loading.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎example/extensions/lib_custom_op/README.md
Lines changed: 34 additions & 23 deletions b/‎example/extensions/lib_custom_op/README.md
Lines changed: 34 additions & 23 deletions
@@ -733,18 +733,34 @@ endif()
 
 # extension libraries (custom operators, custom subgraphs) are built by default
 add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc)
+add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc)
+add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc)
 add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc)
+add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc)
 target_include_directories(customop_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
+target_include_directories(transposecsr_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
+target_include_directories(transposerowsp_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
 target_include_directories(subgraph_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
+target_include_directories(pass_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
 if(USE_CUDA)
   add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu)
   target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
 endif()
-if(MSVC)
+if(UNIX)
+  if (USE_CUDA)
+    target_compile_options(customop_gpu_lib PUBLIC -shared)
+  endif()
+elseif(MSVC)
   target_compile_options(customop_lib PUBLIC /LD)
+  target_compile_options(transposecsr_lib PUBLIC /LD)
+  target_compile_options(transposerowsp_lib PUBLIC /LD)
   target_compile_options(subgraph_lib PUBLIC /LD)
+  target_compile_options(pass_lib PUBLIC /LD)
   set_target_properties(customop_lib PROPERTIES PREFIX "lib")
+  set_target_properties(transposecsr_lib PROPERTIES PREFIX "lib")
+  set_target_properties(transposerowsp_lib PROPERTIES PREFIX "lib")
   set_target_properties(subgraph_lib PROPERTIES PREFIX "lib")
+  set_target_properties(pass_lib PROPERTIES PREFIX "lib")
   if(USE_CUDA)
     target_compile_options(customop_gpu_lib PUBLIC "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fPIC>")
     set_target_properties(customop_gpu_lib PROPERTIES PREFIX "lib")
 
@@ -667,7 +667,7 @@ pylint:
 	python3 -m pylint --rcfile=$(ROOTDIR)/ci/other/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" python/mxnet
 
 # MXNet extension dynamically loading libraries
-EXT_LIBS = build/libcustomop_lib.so build/libsubgraph_lib.so
+EXT_LIBS = build/libcustomop_lib.so build/libtransposecsr_lib.so build/libtransposerowsp_lib.so build/libsubgraph_lib.so build/libpass_lib.so
 ifeq ($(USE_CUDA), 1)
         EXT_LIBS += build/libcustomop_gpu_lib.so
 endif
@@ -682,6 +682,21 @@ build/libcustomop_gpu_lib.so:
 build/libsubgraph_lib.so:
 	@mkdir -p $(@D)
 	$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc -o $@ -I include/mxnet
+build/libtransposecsr_lib.so:
+	@mkdir -p $(@D)
+	$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposecsr_lib.cc -o $@ -I include/mxnet
+build/libtransposerowsp_lib.so:
+	@mkdir -p $(@D)
+	$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposerowsp_lib.cc -o $@ -I include/mxnet
+build/libcustomop_gpu_lib.so:
+	@mkdir -p $(@D)
+	$(NVCC) -shared -std=c++11 -Xcompiler -fPIC example/extensions/lib_custom_op/relu_lib.cu -o $@ -I include/mxnet
+build/libsubgraph_lib.so:
+	@mkdir -p $(@D)
+	$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc -o $@ -I include/mxnet
+build/libpass_lib.so:
+	@mkdir -p $(@D)
+	$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_pass/pass_lib.cc -o $@ -I include/mxnet
 
 # Cython build
 cython:
 
@@ -27,7 +27,7 @@
 #include "lib_api.h"
 
 MXReturnValue initialize(int version) {
-  if (version >= 10400) {
+  if (version >= 10700) {
     std::cout << "MXNet version " << version << " supported" << std::endl;
     return MX_SUCCESS;
   } else {
 
@@ -25,9 +25,19 @@
 import mxnet as mx
 import os
 
+# test loading library
 if (os.name=='posix'):
     path = os.path.abspath('libinit_lib.so')
     mx.library.load(path)
 elif (os.name=='nt'):
     path = os.path.abspath('libinit_lib.dll')
     mx.library.load(path)
+
+# test loading library with verbose=False
+if (os.name=='posix'):
+    path = os.path.abspath('libinit_lib.so')
+    mx.library.load(path, False)
+elif (os.name=='nt'):
+    path = os.path.abspath('libinit_lib.dll')
+    mx.library.load(path, False)
+    
@@ -22,15 +22,13 @@ C++ Custom Operator Example and Tutorial
 
 Adding new operators in MXNet requires understanding of MXNet backend operator registration and recompiling of MXNet with all its dependencies. Users can use the old Python custom operator to add new operators, but it is slow, complicated and has poor adoption rate. So our approach for adding custom operators is to enable dynamic loading of C++ custom operators compiled in external libraries at runtime.
 
-Custom operators (CustomOp) enable users to write new operators without compiling against all of MXNet header files and dependencies. When a library containing custom operators is loaded dynamically, the operators found in the library will be re-registered in MXNet so that users can call those operators natively just like other built-in operators.
+Custom operators (CustomOp) enable users to write new operators without compiling against all of MXNet header files and dependencies. When a library containing custom operators is loaded dynamically, the operators found in the library will be registered in MXNet so that users can call those operators natively just like other built-in operators.
 
 ## Getting Started
 
 ### Have MXNet Ready
 
-Custom Operator support was merged (#15921, #17270) and is not available in versions of MXNet prior to v1.7.0.
-To access the feature now, please install MXNet by compiling from source using master or using the previously mentioned commits, downloading one of the nightly builds, or from a release of MXNet 1.7.0+.
-For running the following example, it doesn’t matter if it is a CUDA, MKLDNN or plain MXNet build; the custom operator doesn’t interact with the execution of other native MXNet operators.
+To run the following example, the build type of MXNet doesn’t matter since the custom operator doesn’t interact with the execution of other native MXNet operators.
 Note that if you want to run GPU examples and write your custom operators running on GPU, you still need an MXNet CUDA build.
 
 ### Run An Example
@@ -117,8 +115,7 @@ There are several required building blocks for making a custom operator:
 
 ```c++
     MXReturnValue parseAttrs(
-        std::map<std::string,
-        std::string> attrs,
+        const std::unordered_map<std::string, std::string>& attrs,
         int* num_in,
         int* num_out)
 ```
@@ -129,30 +126,30 @@ There are several required building blocks for making a custom operator:
 
 ```c++
     MXReturnValue inferType(
-        std::map<std::string, std::string> attrs,
-        std::vector<int> &intypes,
-        std::vector<int> &outtypes)
+        const std::unordered_map<std::string, std::string>& attrs,
+        std::vector<int>* intypes,
+        std::vector<int>* outtypes)
 ```
 
 * [inferShape](./gemm_lib.cc#L143):
     * This function specifies how the custom operator infers output tensor shape using input shape.
 
 ```c++
     MXReturnValue inferShape(
-        std::map<std::string, std::string> attrs,
-        std::vector<std::vector<unsigned int>> &inshapes,
-        std::vector<std::vector<unsigned int>> &outshapes)
+        const std::unordered_map<std::string, std::string>& attrs,
+        std::vector<std::vector<unsigned int>>* inshapes,
+        std::vector<std::vector<unsigned int>>* outshapes)
 ```
 
 * [forward](./gemm_lib.cc#L56):
     * This function specifies the computation of the forward pass of the operator.
 
 ```c++
     MXReturnValue forward(
-        std::map<std::string, std::string> attrs,
-        std::vector<MXTensor> inputs,
-        std::vector<MXTensor> outputs,
-        OpResource res)
+        const std::unordered_map<std::string, std::string>& attrs,
+        std::vector<MXTensor>* inputs,
+        std::vector<MXTensor>* outputs,
+        const OpResource& res)
 ```
 
 Also there are some optional functions you can specify:
@@ -162,19 +159,30 @@ Also there are some optional functions you can specify:
 
 ```c++
     MXReturnValue backward(
-        std::map<std::string, std::string> attrs,
-        std::vector<MXTensor> inputs,
-        std::vector<MXTensor> outputs,
-        OpResource res)
+        const std::unordered_map<std::string, std::string>& attrs,
+        std::vector<MXTensor>* inputs,
+        std::vector<MXTensor>* outputs,
+        const OpResource& res)
+```
+
+* [inferSType](./transposecsr_lib.cc#168) - Storage Type Inference:
+    * This function specifies how the custom operator infers storage types for inputs and outputs.
+
+```c++
+    MXReturnValue inferSType(
+        const std::unordered_map<std::string, std::string>& attrs,
+        std::vector<MXTensor>* inputs,
+        std::vector<MXTensor>* outputs,
+        const OpResource& res)
 ```
 
 * [mutateInputs](./gemm_lib.cc#L214) - Specify mutable input:
     * This function allows you to mark some inputs to be mutable inputs. It is useful when using aux parameters for BatchNorm-like operators.
 
 ```c++
     MXReturnValue mutateInputs(
-        std::map<std::string, std::string> attrs,
-        std::vector<int> &input_indices)
+        const std::unordered_map<std::string, std::string>& attrs,
+        std::vector<int>* input_indices)
 ```
 
 After specifying those functions, register the custom opeartor with MXNet:
@@ -200,6 +208,9 @@ If the number of input and output tensors are fixed, you can use hard-coded numb
 * **inferType**: This function takes three arguments. The 1st argument is the attributes (same as above). The 2nd argument is the a list of input data types corresponding to the input tensors. The 3rd argument is the placeholder for output tensor data types you need to assign.
 For example, if this operator has one input and one output, and data type doesn’t change, then you can do `outtypes[0] = intypes[0]` to populate the data type.
 
+* **inferSType**: This function takes three arguments. The 1st argument is the attributes (same as above). The 2nd argument is the a list of input storage types corresponding to the input tensors. The 3rd argument is the placeholder for output storage types you need to assign.
+For example, if this operator has one input and one output, and data type doesn’t change, then you can do `outtypes[0] = intypes[0]` to populate the data type.
+
 * **inferShape**: This function is similar to the `inferType` function, except it is used for populating the output data shapes. You need to figure out the shapes of each output tensors for this computation.
 For example, if the inputs are images with shape (224,224,3) and you write a padding operator to make 10px borders for the images, then your output shape will be (234,234,3).
 
@@ -285,7 +296,7 @@ As a result, you don’t need to call `cudaMemcpy` to move the tensor data to th
     }
 ```
 
-Note that the `cuda_stream` object used for launching kernels is passed from MXNet backend via `OpResource` object. See below for details of `Operator Resource`.
+Note that the `cuda_stream` object used for launching kernels is passed from MXNet backend via `OpResource` object. See below for details of `Operator Resource`. You need to compile the `lib_api.h` header file with `nvcc` if you plan to create a custom GPU operator to enable the GPU support in the APIs.  
 Also, `in_data` and `out_data` are pointers to the tensor data allocated on the GPU, so you can pass them directly to your CUDA kernel.
 
 At this point all the attribute functions for each operator (`parseAttrs`, `inferShape`, etc.) run on the CPU, including the `forwardGPU` function. The only part that will actually run on the GPU is the launched CUDA kernel function.