Skip to content

Commit fd63da9

Browse files
samskalickyUbuntuUbuntu
committed
[WIP] MXNet Extensions enhancements (apache#17885)
* add debug prints to debug error in CI * add debug prints to debug error in CI * remove prints * initial commit * enabled calling create for selector * connected selector to call external class * added code to remove temp graph attrs * fixed build issues * changed shape inference to use different attr names * fixed selector class * cleaned up APIs * fixed sanity * updated build for extensions * sanity fix * refactored MXLoadLib into separate functions * undo rebase * finished merge * enabled verbose in library loading * fixed example * added passing args/aux down to graph pass * added creating new args/aux for graph passes * fixed return args/aux * fixed sanity * whitespace * fixed lint * updated perl API, README, added pass_lib to cmake build flow * fixed mistake with relu example lib * fixed perl syntax * addressed comments * addressed more comments * fixed compile issues Co-authored-by: Ubuntu <[email protected]> Co-authored-by: Ubuntu <[email protected]>
1 parent a5744be commit fd63da9

File tree

26 files changed

+2246
-829
lines changed

26 files changed

+2246
-829
lines changed

CMakeLists.txt

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,18 +733,34 @@ endif()
733733

734734
# extension libraries (custom operators, custom subgraphs) are built by default
735735
add_library(customop_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/gemm_lib.cc)
736+
add_library(transposecsr_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposecsr_lib.cc)
737+
add_library(transposerowsp_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/transposerowsp_lib.cc)
736738
add_library(subgraph_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_subgraph/subgraph_lib.cc)
739+
add_library(pass_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_pass/pass_lib.cc)
737740
target_include_directories(customop_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
741+
target_include_directories(transposecsr_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
742+
target_include_directories(transposerowsp_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
738743
target_include_directories(subgraph_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
744+
target_include_directories(pass_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
739745
if(USE_CUDA)
740746
add_library(customop_gpu_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/extensions/lib_custom_op/relu_lib.cu)
741747
target_include_directories(customop_gpu_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
742748
endif()
743-
if(MSVC)
749+
if(UNIX)
750+
if (USE_CUDA)
751+
target_compile_options(customop_gpu_lib PUBLIC -shared)
752+
endif()
753+
elseif(MSVC)
744754
target_compile_options(customop_lib PUBLIC /LD)
755+
target_compile_options(transposecsr_lib PUBLIC /LD)
756+
target_compile_options(transposerowsp_lib PUBLIC /LD)
745757
target_compile_options(subgraph_lib PUBLIC /LD)
758+
target_compile_options(pass_lib PUBLIC /LD)
746759
set_target_properties(customop_lib PROPERTIES PREFIX "lib")
760+
set_target_properties(transposecsr_lib PROPERTIES PREFIX "lib")
761+
set_target_properties(transposerowsp_lib PROPERTIES PREFIX "lib")
747762
set_target_properties(subgraph_lib PROPERTIES PREFIX "lib")
763+
set_target_properties(pass_lib PROPERTIES PREFIX "lib")
748764
if(USE_CUDA)
749765
target_compile_options(customop_gpu_lib PUBLIC "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-fPIC>")
750766
set_target_properties(customop_gpu_lib PROPERTIES PREFIX "lib")

Makefile

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,7 @@ pylint:
667667
python3 -m pylint --rcfile=$(ROOTDIR)/ci/other/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" python/mxnet
668668

669669
# MXNet extension dynamically loading libraries
670-
EXT_LIBS = build/libcustomop_lib.so build/libsubgraph_lib.so
670+
EXT_LIBS = build/libcustomop_lib.so build/libtransposecsr_lib.so build/libtransposerowsp_lib.so build/libsubgraph_lib.so build/libpass_lib.so
671671
ifeq ($(USE_CUDA), 1)
672672
EXT_LIBS += build/libcustomop_gpu_lib.so
673673
endif
@@ -682,6 +682,21 @@ build/libcustomop_gpu_lib.so:
682682
build/libsubgraph_lib.so:
683683
@mkdir -p $(@D)
684684
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc -o $@ -I include/mxnet
685+
build/libtransposecsr_lib.so:
686+
@mkdir -p $(@D)
687+
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposecsr_lib.cc -o $@ -I include/mxnet
688+
build/libtransposerowsp_lib.so:
689+
@mkdir -p $(@D)
690+
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_custom_op/transposerowsp_lib.cc -o $@ -I include/mxnet
691+
build/libcustomop_gpu_lib.so:
692+
@mkdir -p $(@D)
693+
$(NVCC) -shared -std=c++11 -Xcompiler -fPIC example/extensions/lib_custom_op/relu_lib.cu -o $@ -I include/mxnet
694+
build/libsubgraph_lib.so:
695+
@mkdir -p $(@D)
696+
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_subgraph/subgraph_lib.cc -o $@ -I include/mxnet
697+
build/libpass_lib.so:
698+
@mkdir -p $(@D)
699+
$(CXX) -shared -fPIC -std=c++11 example/extensions/lib_pass/pass_lib.cc -o $@ -I include/mxnet
685700

686701
# Cython build
687702
cython:

example/extensions/lib_api/init_lib.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#include "lib_api.h"
2828

2929
MXReturnValue initialize(int version) {
30-
if (version >= 10400) {
30+
if (version >= 10700) {
3131
std::cout << "MXNet version " << version << " supported" << std::endl;
3232
return MX_SUCCESS;
3333
} else {

example/extensions/lib_api/test_loading.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,19 @@
2525
import mxnet as mx
2626
import os
2727

28+
# test loading library
2829
if (os.name=='posix'):
2930
path = os.path.abspath('libinit_lib.so')
3031
mx.library.load(path)
3132
elif (os.name=='nt'):
3233
path = os.path.abspath('libinit_lib.dll')
3334
mx.library.load(path)
35+
36+
# test loading library with verbose=False
37+
if (os.name=='posix'):
38+
path = os.path.abspath('libinit_lib.so')
39+
mx.library.load(path, False)
40+
elif (os.name=='nt'):
41+
path = os.path.abspath('libinit_lib.dll')
42+
mx.library.load(path, False)
43+

example/extensions/lib_custom_op/README.md

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,13 @@ C++ Custom Operator Example and Tutorial
2222

2323
Adding new operators in MXNet requires understanding of MXNet backend operator registration and recompiling of MXNet with all its dependencies. Users can use the old Python custom operator to add new operators, but it is slow, complicated and has poor adoption rate. So our approach for adding custom operators is to enable dynamic loading of C++ custom operators compiled in external libraries at runtime.
2424

25-
Custom operators (CustomOp) enable users to write new operators without compiling against all of MXNet header files and dependencies. When a library containing custom operators is loaded dynamically, the operators found in the library will be re-registered in MXNet so that users can call those operators natively just like other built-in operators.
25+
Custom operators (CustomOp) enable users to write new operators without compiling against all of MXNet header files and dependencies. When a library containing custom operators is loaded dynamically, the operators found in the library will be registered in MXNet so that users can call those operators natively just like other built-in operators.
2626

2727
## Getting Started
2828

2929
### Have MXNet Ready
3030

31-
Custom Operator support was merged (#15921, #17270) and is not available in versions of MXNet prior to v1.7.0.
32-
To access the feature now, please install MXNet by compiling from source using master or using the previously mentioned commits, downloading one of the nightly builds, or from a release of MXNet 1.7.0+.
33-
For running the following example, it doesn’t matter if it is a CUDA, MKLDNN or plain MXNet build; the custom operator doesn’t interact with the execution of other native MXNet operators.
31+
To run the following example, the build type of MXNet doesn’t matter since the custom operator doesn’t interact with the execution of other native MXNet operators.
3432
Note that if you want to run GPU examples and write your custom operators running on GPU, you still need an MXNet CUDA build.
3533

3634
### Run An Example
@@ -117,8 +115,7 @@ There are several required building blocks for making a custom operator:
117115
118116
```c++
119117
MXReturnValue parseAttrs(
120-
std::map<std::string,
121-
std::string> attrs,
118+
const std::unordered_map<std::string, std::string>& attrs,
122119
int* num_in,
123120
int* num_out)
124121
```
@@ -129,30 +126,30 @@ There are several required building blocks for making a custom operator:
129126

130127
```c++
131128
MXReturnValue inferType(
132-
std::map<std::string, std::string> attrs,
133-
std::vector<int> &intypes,
134-
std::vector<int> &outtypes)
129+
const std::unordered_map<std::string, std::string>& attrs,
130+
std::vector<int>* intypes,
131+
std::vector<int>* outtypes)
135132
```
136133
137134
* [inferShape](./gemm_lib.cc#L143):
138135
* This function specifies how the custom operator infers output tensor shape using input shape.
139136
140137
```c++
141138
MXReturnValue inferShape(
142-
std::map<std::string, std::string> attrs,
143-
std::vector<std::vector<unsigned int>> &inshapes,
144-
std::vector<std::vector<unsigned int>> &outshapes)
139+
const std::unordered_map<std::string, std::string>& attrs,
140+
std::vector<std::vector<unsigned int>>* inshapes,
141+
std::vector<std::vector<unsigned int>>* outshapes)
145142
```
146143

147144
* [forward](./gemm_lib.cc#L56):
148145
* This function specifies the computation of the forward pass of the operator.
149146

150147
```c++
151148
MXReturnValue forward(
152-
std::map<std::string, std::string> attrs,
153-
std::vector<MXTensor> inputs,
154-
std::vector<MXTensor> outputs,
155-
OpResource res)
149+
const std::unordered_map<std::string, std::string>& attrs,
150+
std::vector<MXTensor>* inputs,
151+
std::vector<MXTensor>* outputs,
152+
const OpResource& res)
156153
```
157154
158155
Also there are some optional functions you can specify:
@@ -162,19 +159,30 @@ Also there are some optional functions you can specify:
162159
163160
```c++
164161
MXReturnValue backward(
165-
std::map<std::string, std::string> attrs,
166-
std::vector<MXTensor> inputs,
167-
std::vector<MXTensor> outputs,
168-
OpResource res)
162+
const std::unordered_map<std::string, std::string>& attrs,
163+
std::vector<MXTensor>* inputs,
164+
std::vector<MXTensor>* outputs,
165+
const OpResource& res)
166+
```
167+
168+
* [inferSType](./transposecsr_lib.cc#168) - Storage Type Inference:
169+
* This function specifies how the custom operator infers storage types for inputs and outputs.
170+
171+
```c++
172+
MXReturnValue inferSType(
173+
const std::unordered_map<std::string, std::string>& attrs,
174+
std::vector<MXTensor>* inputs,
175+
std::vector<MXTensor>* outputs,
176+
const OpResource& res)
169177
```
170178
171179
* [mutateInputs](./gemm_lib.cc#L214) - Specify mutable input:
172180
* This function allows you to mark some inputs to be mutable inputs. It is useful when using aux parameters for BatchNorm-like operators.
173181
174182
```c++
175183
MXReturnValue mutateInputs(
176-
std::map<std::string, std::string> attrs,
177-
std::vector<int> &input_indices)
184+
const std::unordered_map<std::string, std::string>& attrs,
185+
std::vector<int>* input_indices)
178186
```
179187

180188
After specifying those functions, register the custom opeartor with MXNet:
@@ -200,6 +208,9 @@ If the number of input and output tensors are fixed, you can use hard-coded numb
200208
* **inferType**: This function takes three arguments. The 1st argument is the attributes (same as above). The 2nd argument is the a list of input data types corresponding to the input tensors. The 3rd argument is the placeholder for output tensor data types you need to assign.
201209
For example, if this operator has one input and one output, and data type doesn’t change, then you can do `outtypes[0] = intypes[0]` to populate the data type.
202210
211+
* **inferSType**: This function takes three arguments. The 1st argument is the attributes (same as above). The 2nd argument is the a list of input storage types corresponding to the input tensors. The 3rd argument is the placeholder for output storage types you need to assign.
212+
For example, if this operator has one input and one output, and data type doesn’t change, then you can do `outtypes[0] = intypes[0]` to populate the data type.
213+
203214
* **inferShape**: This function is similar to the `inferType` function, except it is used for populating the output data shapes. You need to figure out the shapes of each output tensors for this computation.
204215
For example, if the inputs are images with shape (224,224,3) and you write a padding operator to make 10px borders for the images, then your output shape will be (234,234,3).
205216
@@ -285,7 +296,7 @@ As a result, you don’t need to call `cudaMemcpy` to move the tensor data to th
285296
}
286297
```
287298
288-
Note that the `cuda_stream` object used for launching kernels is passed from MXNet backend via `OpResource` object. See below for details of `Operator Resource`.
299+
Note that the `cuda_stream` object used for launching kernels is passed from MXNet backend via `OpResource` object. See below for details of `Operator Resource`. You need to compile the `lib_api.h` header file with `nvcc` if you plan to create a custom GPU operator to enable the GPU support in the APIs.
289300
Also, `in_data` and `out_data` are pointers to the tensor data allocated on the GPU, so you can pass them directly to your CUDA kernel.
290301
291302
At this point all the attribute functions for each operator (`parseAttrs`, `inferShape`, etc.) run on the CPU, including the `forwardGPU` function. The only part that will actually run on the GPU is the launched CUDA kernel function.

0 commit comments

Comments
 (0)