Skip to content

Commit 02b6116

Browse files
larroyUbuntu
authored andcommitted
Refactor LibraryInitializer so it's thread safe. Fixes random sporadical concurrency crashes. (apache#15762)
* Refactor LibraryInitializer so it's thread safe. Fixes apache#13438 Fixes apache#14979 * Refactor around lib loading * Fix lint * CR * Add option to choose between OMP implementations * Fix bug * Fix from CR
1 parent c06b628 commit 02b6116

File tree

10 files changed

+359
-253
lines changed

10 files changed

+359
-253
lines changed

CMakeLists.txt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ mxnet_option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF)
2424
mxnet_option(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
2525
mxnet_option(USE_OPENCV "Build with OpenCV support" ON)
2626
mxnet_option(USE_OPENMP "Build with Openmp support" ON)
27+
mxnet_option(USE_OPENMP_BUNDLED_LLVM "Build with bundled llvm openmp from 3rdparty" OFF)
2728
mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path
2829
mxnet_option(USE_SSE "Build with x86 SSE instruction support" ON IF NOT ARM)
2930
mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
@@ -433,11 +434,11 @@ if(USE_OPENMP)
433434
find_package(OpenMP REQUIRED)
434435
# This should build on Windows, but there's some problem and I don't have a Windows box, so
435436
# could a Windows user please fix?
436-
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt
437-
AND SYSTEM_ARCHITECTURE STREQUAL "x86_64"
438-
AND NOT MSVC
439-
AND NOT CMAKE_CROSSCOMPILING)
440-
437+
if(USE_OPENMP_BUNDLED_LLVM AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt
438+
AND SYSTEM_ARCHITECTURE STREQUAL "x86_64"
439+
AND NOT MSVC
440+
AND NOT CMAKE_CROSSCOMPILING)
441+
message("Using bundlded LLVM OpenMP")
441442
# Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp
442443
set(OPENMP_STANDALONE_BUILD TRUE)
443444
set(LIBOMP_ENABLE_SHARED TRUE)
@@ -451,6 +452,7 @@ if(USE_OPENMP)
451452
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
452453
add_definitions(-DMXNET_USE_OPENMP=1)
453454
else()
455+
message("Using platform provided OpenMP")
454456
if(OPENMP_FOUND)
455457
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
456458
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")

docs/faq/env_var.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
3939

4040
## Set the Number of Threads
4141

42+
* MXNET_OMP_MAX_THREADS
43+
- Values: Int ```(default=Number of processors / Number of processors * 2 in X86)```
44+
- Maximum number of threads to use in individual operators through OpenMP. If not set, OMP_NUM_THREADS is considered after.
4245
* MXNET_GPU_WORKER_NTHREADS
4346
- Values: Int ```(default=2)```
4447
- The maximum number of threads to use on each GPU. This parameter is used to parallelize the computation within a single GPU card.
@@ -47,7 +50,7 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
4750
- The maximum number of concurrent threads that do the memory copy job on each GPU.
4851
* MXNET_CPU_WORKER_NTHREADS
4952
- Values: Int ```(default=1)```
50-
- The maximum number of scheduling threads on CPU. It specifies how many operators can be run in parallel. Note that most CPU operators are parallelized by OpenMP. To change the number of threads used by individual operators, please set `OMP_NUM_THREADS` instead.
53+
- The maximum number of scheduling threads on CPU. It specifies how many operators can be run in parallel. Note that most CPU operators are parallelized by OpenMP. To change the number of threads used by individual operators, please set `MXNET_OMP_MAX_THREADS` instead.
5154
* MXNET_CPU_PRIORITY_NTHREADS
5255
- Values: Int ```(default=4)```
5356
- The number of threads given to prioritized CPU jobs.
@@ -56,10 +59,13 @@ $env:MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0
5659
- The number of threads used for NNPACK. NNPACK package aims to provide high-performance implementations of some layers for multi-core CPUs. Checkout [NNPACK](http://mxnet.io/faq/nnpack.html) to know more about it.
5760
* MXNET_MP_WORKER_NTHREADS
5861
- Values: Int ```(default=1)```
59-
- The number of scheduling threads on CPU given to multiprocess workers. Enlarge this number allows more operators to run in parallel in individual workers but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
62+
- The number of scheduling threads on CPU given to multiprocess workers (after fork). Enlarge this number allows more operators to run in parallel in individual workers but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
6063
* MXNET_MP_OPENCV_NUM_THREADS
6164
- Values: Int ```(default=0)```
6265
- The number of OpenCV execution threads given to multiprocess workers. OpenCV multithreading is disabled if `MXNET_MP_OPENCV_NUM_THREADS` < 1 (default). Enlarge this number may boost the performance of individual workers when executing underlying OpenCV functions but please consider reducing the overall `num_workers` to avoid thread contention (not available on Windows).
66+
* MXNET_GPU_COPY_NTHREADS
67+
- Values:: Int ```(default=2)```
68+
- Number of threads for copying data from CPU to GPU.
6369

6470
## Memory Options
6571

src/c_api/c_api.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@
4646
#include "mxnet/libinfo.h"
4747
#include "mxnet/imperative.h"
4848
#include "mxnet/lib_api.h"
49+
#include "../initialize.h"
4950
#include "./c_api_common.h"
5051
#include "../operator/custom/custom-inl.h"
5152
#include "../operator/tensor/matrix_op-inl.h"
5253
#include "../operator/tvmop/op_module.h"
5354
#include "../common/utils.h"
54-
#include "../common/library.h"
5555

5656
using namespace mxnet;
5757

@@ -95,7 +95,7 @@ inline int MXAPIGetFunctionRegInfo(const FunRegType *e,
9595
// Loads library and initializes it
9696
int MXLoadLib(const char *path) {
9797
API_BEGIN();
98-
void *lib = load_lib(path);
98+
void *lib = LibraryInitializer::Get()->lib_load(path);
9999
if (!lib)
100100
LOG(FATAL) << "Unable to load library";
101101

src/common/library.cc

Lines changed: 0 additions & 125 deletions
This file was deleted.

src/common/library.h

Lines changed: 0 additions & 57 deletions
This file was deleted.

src/common/utils.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,21 @@
5050
#include "../operator/nn/mkldnn/mkldnn_base-inl.h"
5151
#endif
5252

53+
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
54+
#include <windows.h>
55+
#else
56+
#include <unistd.h>
57+
#endif
58+
59+
5360
namespace mxnet {
5461
namespace common {
5562

63+
#if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
64+
inline size_t current_process_id() { return ::GetCurrentProcessId(); }
65+
#else
66+
inline size_t current_process_id() { return getpid(); }
67+
#endif
5668
/*!
5769
* \brief IndPtr should be non-negative, in non-decreasing order, start with 0
5870
* and end with value equal with size of indices.

src/engine/threaded_engine_perdevice.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <dmlc/parameter.h>
2929
#include <dmlc/concurrency.h>
3030
#include <dmlc/thread_group.h>
31+
#include "../initialize.h"
3132
#include "./threaded_engine.h"
3233
#include "./thread_pool.h"
3334
#include "../common/lazy_alloc_array.h"
@@ -76,7 +77,8 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
7677
void Start() override {
7778
if (is_worker_) return;
7879
gpu_worker_nthreads_ = common::GetNumThreadsPerGPU();
79-
cpu_worker_nthreads_ = dmlc::GetEnv("MXNET_CPU_WORKER_NTHREADS", 1);
80+
// MXNET_CPU_WORKER_NTHREADS
81+
cpu_worker_nthreads_ = LibraryInitializer::Get()->cpu_worker_nthreads_;
8082
gpu_copy_nthreads_ = dmlc::GetEnv("MXNET_GPU_COPY_NTHREADS", 2);
8183
// create CPU task
8284
int cpu_priority_nthreads = dmlc::GetEnv("MXNET_CPU_PRIORITY_NTHREADS", 4);

0 commit comments

Comments
 (0)